From 5bbac56d2a331b1b3f15b95d39bc957738fec00f Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 15:20:07 +0900
Subject: [PATCH 01/70] aist new license

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index 3269683..e81a63a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2019 National Institute of Advanced Industrial Science and Technology (AIST)
+   Copyright National Institute of Advanced Industrial Science and Technology (AIST), AIST-Product-ID: 2022PRO-2776
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

From 230cf364c99995595b54784a70e9c80b4dfee028 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 15:20:35 +0900
Subject: [PATCH 02/70] setup environment via conda

---
 README.md               | 35 ++++++++++++++++++++++++++++-------
 setup/conda-activate.sh |  6 ++++++
 setup/conda-create.sh   |  7 +++++++
 setup/conda-install.sh  |  9 +++++++++
 setup/install-brat.sh   | 10 ++++++++++
 5 files changed, 60 insertions(+), 7 deletions(-)
 create mode 100644 setup/conda-activate.sh
 create mode 100644 setup/conda-create.sh
 create mode 100644 setup/conda-install.sh
 create mode 100644 setup/install-brat.sh

diff --git a/README.md b/README.md
index f10213e..e669e34 100644
--- a/README.md
+++ b/README.md
@@ -31,9 +31,36 @@ A deep leanring model to predict named entities, triggers, and nested events fro
 - [Our scores](https://b2share.eudat.eu/api/files/3cf6c1f4-5eed-4ee3-99c5-d99f5f011be3/scores.tar.gz)
 
 # 2. Preparation
-## 2.1. Requirements
+## 2.1. Environment
+
+1. Install conda environment
+
+```bash
+sh setup/conda-install.sh
+```
+
+2. Create a conda environment (for the first time)
+
+```bash
+. setup/conda-create.sh
+```
+
+3. Activate the conda environment
+
+- Every time you run: before installing packages, before running evaluation scripts, etc
+
+```bash
+. setup/conda-activate.sh
+```
+
+4. Install requirements
 - Python 3.6.5
 - PyTorch (torch==1.1.0 torchvision==0.3.0, cuda92)
+- Python dependencies
+
+```bash
+pip install -r requirements.txt
+```
 
 ```bash
 virtualenv -p python3 pytorch-env
@@ -42,12 +69,6 @@ export CUDA_VISIBLE_DEVICES=0
 CUDA_PATH=/usr/local/cuda pip install torch==1.1.0 torchvision==0.3.0
 ```
 
-- Install Python packages
-
-```bash
-sh install.sh
-```
-
 ## 2.2. BERT
 - Download SciBERT BERT model from PyTorch AllenNLP
 
diff --git a/setup/conda-activate.sh b/setup/conda-activate.sh
new file mode 100644
index 0000000..06ded76
--- /dev/null
+++ b/setup/conda-activate.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+ROOT=$PWD
+source $ROOT/miniconda3/bin/activate deepeventmine-conda-env
+
+echo "Activated conda environment: deepeventmine-conda-env"
\ No newline at end of file
diff --git a/setup/conda-create.sh b/setup/conda-create.sh
new file mode 100644
index 0000000..c9fd537
--- /dev/null
+++ b/setup/conda-create.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+ROOT=$PWD
+source $ROOT/miniconda3/bin/activate
+conda create -n deepeventmine-conda-env python=3.6
+
+echo "Created conda environment: deepeventmine-conda-env"
\ No newline at end of file
diff --git a/setup/conda-install.sh b/setup/conda-install.sh
new file mode 100644
index 0000000..40a0d2d
--- /dev/null
+++ b/setup/conda-install.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+ROOT=$PWD
+
+# install conda
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+bash Miniconda3-latest-Linux-x86_64.sh -p $ROOT/miniconda3
+
+echo "Done!"
\ No newline at end of file
diff --git a/setup/install-brat.sh b/setup/install-brat.sh
new file mode 100644
index 0000000..bf72df2
--- /dev/null
+++ b/setup/install-brat.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+ROOT=$PWD
+BRAT_DIR="$ROOT/brat"
+
+git clone https://github.com/nlplab/brat.git
+
+cd $BRAT_DIR
+./install.sh -u
+

From 8bbf011a448ebc7012e501bd5fd9531ce05c6cc5 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 15:26:30 +0900
Subject: [PATCH 03/70] update requirements

---
 requirements.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9e5c209..a74894f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,4 +10,6 @@ texttable
 sklearn
 pytorch-nlp
 tqdm
-requests
\ No newline at end of file
+requests
+torch
+torchvision
\ No newline at end of file

From e1a812d10457037886ed0ca67686855e54e08ea7 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 15:28:08 +0900
Subject: [PATCH 04/70] torch version

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index a74894f..d31bf44 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,5 +11,5 @@ sklearn
 pytorch-nlp
 tqdm
 requests
-torch
-torchvision
\ No newline at end of file
+torch==1.1.0
+torchvision==0.3.0
\ No newline at end of file

From 1428e995e17f2b8f0da010f98ec5fb236be0aab5 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 15:46:22 +0900
Subject: [PATCH 05/70] prepare for training cg

---
 README.md                  | 69 +++++++++++++++++---------------------
 run/train/download-bert.sh | 18 ++++++++++
 run/train/prepare-cg.sh    | 63 ++++++++++++++++++++++++++++++++++
 3 files changed, 112 insertions(+), 38 deletions(-)
 create mode 100644 run/train/download-bert.sh
 create mode 100644 run/train/prepare-cg.sh

diff --git a/README.md b/README.md
index e669e34..82d0c43 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,12 @@
-# 1. DeepEventMine
+# DeepEventMine
 A deep leanring model to predict named entities, triggers, and nested events from biomedical texts.
 
 - The model and results are reported in our paper:
 
 [DeepEventMine: End-to-end Neural Nested Event Extraction from Biomedical Texts](https://doi.org/10.1093/bioinformatics/btaa540), Bioinformatics, 2020.
 
-## 1.1. Features
+## Overview
+1. Features
 - Based on [pre-trained BERT](https://github.com/allenai/scibert)
 - Predict nested entities and nested events
 - Provide our trained models on the seven biomedical tasks
@@ -13,7 +14,7 @@ A deep leanring model to predict named entities, triggers, and nested events fro
 - Predict for new data given raw text input or PubMed ID
 - Visualize the predicted entities and events on the [brat](http://brat.nlplab.org)
 
-## 1.2. Tasks
+2. Tasks
 
 - DeepEventMine has been trained and evaluated on the following tasks (six BioNLP shared tasks and MLEE).
 
@@ -25,14 +26,7 @@ A deep leanring model to predict named entities, triggers, and nested events fro
 6. pc: [Pathway Curation (PC), 2013](http://2013.bionlp-st.org/tasks/pathway-curation)
 7. mlee: [Multi-Level Event Extraction (MLEE)](http://nactem.ac.uk/MLEE/)
 
-## 1.3. Our trained models and scores
-
-- [Our trained models](https://b2share.eudat.eu/records/80d2de0c57d64419b722dc1afa375f28)
-- [Our scores](https://b2share.eudat.eu/api/files/3cf6c1f4-5eed-4ee3-99c5-d99f5f011be3/scores.tar.gz)
-
-# 2. Preparation
-## 2.1. Environment
-
+# 1. Preparation
 1. Install conda environment
 
 ```bash
@@ -62,42 +56,32 @@ sh setup/conda-install.sh
 pip install -r requirements.txt
 ```
 
-```bash
-virtualenv -p python3 pytorch-env
-source pytorch-env/bin/activate
-export CUDA_VISIBLE_DEVICES=0
-CUDA_PATH=/usr/local/cuda pip install torch==1.1.0 torchvision==0.3.0
-```
-
-## 2.2. BERT
-- Download SciBERT BERT model from PyTorch AllenNLP
+5. [Brat](https://github.com/nlplab/brat) for visualization
+- brat instructions](http://brat.nlplab.org/installation.html)
 
 ```bash
-sh download.sh bert
+sh setup/install-brat.sh
+python2 standalone.py
 ```
 
-## 2.3. DeepEventMine
-- Download  pre-trained DeepEventMine model on a given task
-- [task] = cg (or pc, ge11, epi, etc)
+# 2. Training CG
+1. Download data and process
+- Download data
+- Process data to appropriate format
+- Tokenize texts and retrieve offsets
+- Data statistics
+- Download the processed event structures
 
 ```bash
-sh download.sh deepeventmine [task]
+sh run/train/prepare-cg.sh
 ```
 
-## 2.4 Brat
-- To visualize the output using the [brat](http://brat.nlplab.org)
-- Download [brat v1.3](http://brat.nlplab.org)
-
+2. Download models
+- Download SciBERT model from PyTorch AllenNLP
 ```bash
-sh download.sh brat
+sh run/train/download-bert.sh
 ```
 
-- Install brat based on the [brat instructions](http://brat.nlplab.org/installation.html)
-```bash
-cd brat/brat-v1.3_Crunchy_Frog/
-./install.sh -u
-python2 standalone.py
-```
 
 # 3. Predict (BioNLP tasks)
 
@@ -110,13 +94,22 @@ python2 standalone.py
 sh download.sh bionlp [task]
 ```
 
-2. Preprocess data
+2. Download our pre-trained DeepEventMine model on a given task
+- [Our trained models](https://b2share.eudat.eu/records/80d2de0c57d64419b722dc1afa375f28)
+- [Our scores](https://b2share.eudat.eu/api/files/3cf6c1f4-5eed-4ee3-99c5-d99f5f011be3/scores.tar.gz)
+- [task] = cg (or pc, ge11, epi, etc)
+
+```bash
+sh download.sh deepeventmine [task]
+```
+
+3. Preprocess data
 - Tokenize texts and prepare data for prediction
 ```bash
 sh preprocess.sh bionlp
 ```
 
-3. Generate configs
+4. Generate configs
 - If using GPU: [gpu] = 0, otherwise: [gpu] = -1
 - [task] = cg, pc, etc
 ```bash
diff --git a/run/train/download-bert.sh b/run/train/download-bert.sh
new file mode 100644
index 0000000..0990c9d
--- /dev/null
+++ b/run/train/download-bert.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+root_dir=$PWD
+
+# download BERT model
+BERT_DIR="data/bert"
+mkdir -p $BERT_DIR
+cd $BERT_DIR
+
+wget https://s3-us-west-2.amazonaws.com/ai2-s2-research/scibert/pytorch_models/scibert_scivocab_cased.tar
+tar -xvf scibert_scivocab_cased.tar
+rm *.tar
+
+cd scibert_scivocab_cased
+tar -xzvf weights.tar.gz
+rm *.tar.gz
+
+cd $root_dir
diff --git a/run/train/prepare-cg.sh b/run/train/prepare-cg.sh
new file mode 100644
index 0000000..8464d86
--- /dev/null
+++ b/run/train/prepare-cg.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# to root
+ROOT=$PWD
+
+# 1. DOWNLOAD DATA
+echo "-----------------------------"
+echo "1. Download CG data"
+
+CG_DIR="data/original_corpora/cg"
+
+mkdir -p $CG_DIR
+
+cd $CG_DIR
+
+wget http://2013.bionlp-st.org/tasks/BioNLP-ST_2013_CG_training_data.tar.gz
+wget http://2013.bionlp-st.org/tasks/BioNLP-ST_2013_CG_development_data.tar.gz
+wget http://2013.bionlp-st.org/tasks/BioNLP-ST_2013_CG_test_data.tar.gz
+
+tar -xzvf BioNLP-ST_2013_CG_training_data.tar.gz
+tar -xzvf BioNLP-ST_2013_CG_development_data.tar.gz
+tar -xzvf BioNLP-ST_2013_CG_test_data.tar.gz
+
+mv BioNLP-ST_2013_CG_training_data train
+mv BioNLP-ST_2013_CG_development_data dev
+mv BioNLP-ST_2013_CG_test_data test
+
+rm *.tar.gz
+
+cd $ROOT
+
+# 2. TOKENIZATION
+echo "-----------------------------"
+echo "2. Tokenization"
+
+input_dir=data/original_corpora/
+output_dir=data/corpora/
+python scripts/preprocess.py \
+  --indir $input_dir \
+  --outdir $output_dir
+
+cd $ROOT
+
+# 3. EVENT STRUCTURES
+echo "-----------------------------"
+echo "3. Download processed event structures"
+
+# our processed event structures data
+FILEID=1qzjY38RdnCvomVAD3XMk_J4pi1gIraA5
+
+CG_EVENT_STRUCTURES="data/general_rules"
+mkdir -p $CG_EVENT_STRUCTURES
+cd $CG_EVENT_STRUCTURES
+
+gdown https://drive.google.com/uc?id=$FILEID
+tar -xzvf cg.tar.gz
+rm *.tar.gz
+
+cd $ROOT
+
+# DONE
+echo "-----------------------------"
+echo "Done!"
\ No newline at end of file

From e5202ca8ce38ef454fbc4a6d0a24f65aeb6899f4 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 15:48:43 +0900
Subject: [PATCH 06/70] event structures

---
 run/train/prepare-cg.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run/train/prepare-cg.sh b/run/train/prepare-cg.sh
index 8464d86..9c4183f 100644
--- a/run/train/prepare-cg.sh
+++ b/run/train/prepare-cg.sh
@@ -48,7 +48,7 @@ echo "3. Download processed event structures"
 # our processed event structures data
 FILEID=1qzjY38RdnCvomVAD3XMk_J4pi1gIraA5
 
-CG_EVENT_STRUCTURES="data/general_rules"
+CG_EVENT_STRUCTURES="data/event-structures"
 mkdir -p $CG_EVENT_STRUCTURES
 cd $CG_EVENT_STRUCTURES
 

From c889d8aaa44f203fb8da8e88bf90453aa121618d Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 15:56:26 +0900
Subject: [PATCH 07/70] generate training configs

---
 README.md                     |   5 +
 configs/cg.yaml               |   4 +-
 configs/default.yaml          | 130 +++++++++++++++--
 run/train/generate-configs.sh |   9 ++
 scripts/generate_configs.py   | 267 ++++++++++++++++++++++++----------
 5 files changed, 317 insertions(+), 98 deletions(-)
 create mode 100644 run/train/generate-configs.sh

diff --git a/README.md b/README.md
index 82d0c43..f8fba47 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,11 @@ sh run/train/prepare-cg.sh
 sh run/train/download-bert.sh
 ```
 
+3. Generate configs
+```bash
+sh run/train/generate_configs.sh cg basic
+```
+- Experiment name: basic, exp1, exp2, etc
 
 # 3. Predict (BioNLP tasks)
 
diff --git a/configs/cg.yaml b/configs/cg.yaml
index 187de59..9b05ddd 100644
--- a/configs/cg.yaml
+++ b/configs/cg.yaml
@@ -1,2 +1,2 @@
-# Configs for specific task
-a2_entities: [DNA_domain_or_region,Protein_domain_or_region]
\ No newline at end of file
+# Configuration for CG task.
+a2_entities: [DNA_domain_or_region, Protein_domain_or_region]
\ No newline at end of file
diff --git a/configs/default.yaml b/configs/default.yaml
index 67c56ca..8e02934 100644
--- a/configs/default.yaml
+++ b/configs/default.yaml
@@ -1,25 +1,123 @@
-# Configuration file.
+# Configuration file for training model.
 
+# data path
 task_name: cg
+train_data: data/corpora/cg/train/
+dev_data: data/corpora/cg/dev/
+test_data: data/corpora/cg/dev/
 
-# model
 bert_model: data/bert/scibert_scivocab_cased
-model_path: data/models/cg/model/
-saved_params: data/models/cg/cg.param
 
-# data
-test_data: ..
+# saving options
+result_dir: ..
+save_params: False
+save_ner: False
+save_rel: False
+save_ev: False
+save_all_models: False
+save_model_pipeline: False
+
+# hyperparameters
+epoch: 100
+batchsize: 16
+learning_rate: 3e-5
+dropout: 0.3
+fp16: False
+gpu: 0
+
+# dimensions
+bert_dim: 768
+hidden_dim: 1000
+etype_dim: 300 # entity type
+rtype_dim: 150 # relation type
+role_dim: 1000 # event argument
+
+ner_reduce: False
+ner_reduced_size: 500
+rel_reduced_size: 500
+ev_reduced_size: 500
+
+# bert
+seed: 42
+gradient_accumulation_steps: 1
+loss_scale: 128
+warmup_proportion: 0.1
+max_seq: 128
+bert_warmup_lr: True
+
+# ner config
+ner_label_limit: 2 # nested level
+ner_threshold: 0.5
+max_entity_width: 14
+max_trigger_width: 10
+
+# event config
+ev_threshold: 0.5
+max_ev_level: 3 # nested level
+max_ev_args: 4 # maximum number of arguments
+ev_nested_epoch: 0
+modality_epoch: 0
+flat_ev_scale: 1
+nest_ev_scale: 1
+modality_weight: 1
+use_dev_rule: False
+use_general_rule: False
+rule_dir: data/event-structures/cg/
 
-# eval
-ev_eval_script_path: eval/scripts/eval-ev-cg.py
+# training options
+use_gold_ner: False
+use_gold_rel: False
+ner_predict_all: True # True/False: predict entity or not
+skip_ner: False
+skip_rel: False
+ner_epoch: 100
+rel_epoch: 100
+
+freeze_bert: False
+freeze_ner: False
+freeze_rel: False
+
+ner_epoch_limit: -1
+rel_epoch_limit: -1
+ner_loss_weight_main: 1
+rel_loss_weight_minor: 1
+ev_loss_weight_minor: 1
+ner_loss_weight_minor: 1
+rel_loss_weight_main: 1
+ev_loss_weight_main: 1
+
+direction: l2r+r2l
+lab2ign: 1:Other:2
+include_nested: True
+enable_triggers_pair: True
+train: True
+rel_lb_weight: -1
+
+lowercase : False
+filter_no_ent_sents: False
+use_context: True
+min_w_freq: 1 # for singletonW
+unk_w_prob: 0.01 # for pre_sentences
+
+# lstm
+use_lstm: False
+pretrain_word_model: word-embs/PubMed-shuffle-win-2-sample200.txt
+
+# evaluation
+ner_eval_corpus: cg
+rel_eval_script_path: eval/scripts/n2c2.py
+ev_eval_script_path: eval/scripts/eval-ev-cg.py # for event
+ev_matching: -sp # s: softboundary, p: partialrecursive
 a2_entities: []
-raw_text: False
-ner_predict_all: False
 
-# output
-result_dir: ..
+# prediction setting
+predict: False
+pipelines: False
+t_batch_size: 16
+t_gpu: 0
+t_fp16: False
+gold_eval: False
 
-# params
-gpu: -1
-batchsize: 16
-seed: 42
\ No newline at end of file
+# display options
+stats: True # print relations
+show_macro: False # print result for relations
\ No newline at end of file
diff --git a/run/train/generate-configs.sh b/run/train/generate-configs.sh
new file mode 100644
index 0000000..c2df32d
--- /dev/null
+++ b/run/train/generate-configs.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+experiment_dir="experiments/"
+mkdir -p $experiment_dir
+
+task=$1
+setting=$2
+
+python scripts/generate_configs.py $experiment_dir $task $setting
diff --git a/scripts/generate_configs.py b/scripts/generate_configs.py
index 4e07162..b6707e9 100644
--- a/scripts/generate_configs.py
+++ b/scripts/generate_configs.py
@@ -10,63 +10,165 @@
 def write_config(datapath, config):
     """Write config to file"""
 
+    # with open(datapath, 'w') as outfile:
+    #     yaml.dump(config, outfile, default_flow_style=False, sort_keys=False)
+
     with open(datapath, 'w') as outfile:
         for key, value in config.items():
 
             # format
-            if key == 'bert_model' or key == 'test_data' or key == 'ev_eval_script_path' or key == 'result_dir' or key == 'gpu':
+            if key == 'result_dir':
+                outfile.write('\n')
+            if key == 'epoch':
+                outfile.write('\n')
+            if key == 'bert_dim':
+                outfile.write('\n')
+            if key == 'ner_reduce':
+                outfile.write('\n')
+            if key == 'seed':
+                outfile.write('\n')
+            if key == 'ner_label_limit':
+                outfile.write('\n')
+            if key == 'ev_threshold':
+                outfile.write('\n')
+            if key == 'use_gold_ner':
+                outfile.write('\n')
+            if key == 'freeze_bert':
+                outfile.write('\n')
+            if key == 'ner_epoch_limit':
+                outfile.write('\n')
+            if key == 'direction':
+                outfile.write('\n')
+            if key == 'ner_eval_corpus':
+                outfile.write('\n')
+            if key == 'predict':
                 outfile.write('\n')
 
             outfile.write('{}: {}'.format(key, value))
             outfile.write('\n')
 
 
-def gen_predict_config(predict_config, specific_config, eval_set, config_dir, model_name, taskdir):
-    """For joint prediction"""
+def gen_ner_config(ner_config, task_config, config_dir, taskdir):
+    """For entity"""
 
-    # dev and test sets
-    if eval_set == 'dev' or eval_set == 'test':
-        predict_config['test_data'] = ''.join(["data/corpora/", model_name, "/", eval_set, "/"])
-        predict_config['result_dir'] = ''.join([taskdir, '/predict-gold-', eval_set, '/'])
+    ner_config['result_dir'] = ''.join([taskdir, 'ner/'])
+    ner_config['ner_model_dir'] = ''.join([taskdir, 'ner/model/'])
+    ner_config['save_ner'] = True
 
-        # overwrite task config
-        overwrite_task_config(predict_config, specific_config)
+    # overwrite task config
+    overwrite_task_config(ner_config, task_config)
 
-        write_config(os.path.join(config_dir, ''.join(['predict-gold-', eval_set, '.yaml'])), predict_config)
+    write_config(os.path.join(config_dir, 'train-ner.yaml'), ner_config)
 
-    # for raw texts
-    elif eval_set == 'raw-text':
-        predict_config['test_data'] = ''.join(["data/processed-raw-text/", model_name, "/"])
-        predict_config['result_dir'] = ''.join([taskdir, '/predict-', eval_set, '/'])
-        predict_config['raw_text'] = True
-        predict_config['ner_predict_all'] = True
 
-        # overwrite task config
-        overwrite_task_config(predict_config, specific_config)
+def gen_rel_config(rel_config, task_config, config_dir, taskdir):
+    """For relation"""
 
-        write_config(os.path.join(config_dir, ''.join(['predict-', eval_set, '.yaml'])), predict_config)
+    rel_config['result_dir'] = ''.join([taskdir, 'rel/'])
+    rel_config['rel_model_dir'] = ''.join([taskdir, 'rel/model/'])
+    rel_config['save_rel'] = True
+    rel_config['use_gold_ner'] = True
+    rel_config['use_gold_rel'] = False
+    rel_config['ner_predict_all'] = False
+    rel_config['skip_ner'] = True
+    rel_config['ner_epoch'] = -1
 
+    # overwrite task config
+    overwrite_task_config(rel_config, task_config)
+
+    write_config(os.path.join(config_dir, 'train-rel.yaml'), rel_config)
+
+
+def gen_ev_config(ev_config, task_config, config_dir, taskdir):
+    """For event"""
+
+    ev_config['result_dir'] = ''.join([taskdir, 'ev/'])
+    ev_config['ev_model_dir'] = ''.join([taskdir, 'ev/model/'])
+    ev_config['save_ev'] = True
+    ev_config['ev_nested_epoch'] = 20
+    ev_config['modality_epoch'] = 20
+    ev_config['use_general_rule'] = True
+    ev_config['use_gold_ner'] = True
+    ev_config['use_gold_rel'] = True
+    ev_config['ner_predict_all'] = False
+    ev_config['skip_ner'] = True
+    ev_config['skip_rel'] = True
+    ev_config['ner_epoch'] = -1
+    ev_config['rel_epoch'] = -1
+    ev_config['freeze_bert'] = True
+    ev_config['freeze_ner'] = True
+    ev_config['freeze_rel'] = True
 
-def gen_predict_config_pubmed(predict_config, specific_config, config_dir, expdir, dataname):
-    predict_config['test_data'] = ''.join(["data/", dataname, "/processed-text/", "text/"])
-    predict_config['result_dir'] = ''.join([expdir, dataname, '/results/'])
-    predict_config['raw_text'] = True
-    predict_config['ner_predict_all'] = True
+    # overwrite task config
+    overwrite_task_config(ev_config, task_config)
+
+    write_config(os.path.join(config_dir, 'train-ev.yaml'), ev_config)
+
+
+def gen_joint_config(joint_config, task_config, config_dir, taskdir):
+    """For joint"""
+
+    joint_config['result_dir'] = ''.join([taskdir, 'joint-gold/'])
+    joint_config['joint_model_dir'] = ''.join([taskdir, 'joint-gold/model/'])
+    joint_config['save_params'] = True
+    joint_config['save_all_models'] = True
+    joint_config['use_general_rule'] = True
+    joint_config['ner_model_dir'] = ''.join([taskdir, 'ner/model/'])
+    joint_config['rel_model_dir'] = ''.join([taskdir, 'rel/model/'])
+    joint_config['ev_model_dir'] = ''.join([taskdir, 'ev/model/'])
+    joint_config['ner_predict_all'] = False
+    joint_config['ner_epoch'] = -1
+    joint_config['rel_epoch'] = -1
+    joint_config['ner_epoch_limit'] = 70
+    joint_config['rel_epoch_limit'] = 90
+    joint_config['rel_loss_weight_minor'] = 0.001
+    joint_config['ev_loss_weight_minor'] = 0.001
+    joint_config['ner_loss_weight_minor'] = 0.5
+    joint_config['rel_loss_weight_main'] = 0.5
+    joint_config['ev_loss_weight_main'] = 0.1
 
     # overwrite task config
-    overwrite_task_config(predict_config, specific_config)
-    write_config(os.path.join(config_dir, ''.join(['predict-', dataname, '.yaml'])), predict_config)
+    overwrite_task_config(joint_config, task_config)
 
+    write_config(os.path.join(config_dir, 'train-joint-gold.yaml'), joint_config)
 
-def overwrite_task_config(config, specific_config):
-    """Overwrite config for specific task."""
 
-    # add specific task config
-    for key, value in specific_config.items():
-        if key in config:
-            config[key] = value
+def gen_joint_e2e_config(joint_e2e_config, task_config, config_dir, taskdir):
+    """For joint end-to-end"""
 
-    return config
+    joint_e2e_config['result_dir'] = ''.join([taskdir, 'joint-e2e/'])
+    joint_e2e_config['joint_model_dir'] = ''.join([taskdir, 'joint-e2e/model/'])
+    joint_e2e_config['ner_predict_all'] = True
+
+    # overwrite task config
+    overwrite_task_config(joint_e2e_config, task_config)
+
+    write_config(os.path.join(config_dir, 'train-joint-e2e.yaml'), joint_e2e_config)
+
+
+def gen_predict_config(predict_config, eval_set, config_dir, taskdir):
+    """For joint prediction"""
+
+    predict_config['test_data'] = predict_config['test_data'].replace('dev', eval_set)
+    predict_config['result_dir'] = ''.join([taskdir, 'predict-gold-', eval_set, '/'])
+    predict_config['save_params'] = False
+    predict_config['joint_model_dir'] = ''.join([taskdir, 'joint-gold/model/'])
+    predict_config['params'] = ''.join([taskdir, 'joint-gold/', predict_config['task_name'], '.param'])
+    predict_config['predict'] = True
+    predict_config['ner_predict_all'] = False
+
+    write_config(os.path.join(config_dir, ''.join(['predict-gold-', eval_set, '.yaml'])), predict_config)
+
+
+def gen_predict_e2e_config(predict_e2e_config, eval_set, config_dir, taskdir):
+    """For joint end-to-end prediction"""
+
+    predict_e2e_config['result_dir'] = ''.join([taskdir, 'predict-e2e-', eval_set, '/'])
+    predict_e2e_config['joint_model_dir'] = ''.join([taskdir, 'joint-e2e/model/'])
+    predict_e2e_config['params'] = ''.join([taskdir, 'joint-e2e/', predict_e2e_config['task_name'], '.param'])
+    predict_e2e_config['ner_predict_all'] = True
+
+    write_config(os.path.join(config_dir, ''.join(['predict-e2e-', eval_set, '.yaml'])), predict_e2e_config)
 
 
 def read_specific_config(task):
@@ -85,11 +187,23 @@ def read_specific_config(task):
     return specific_config
 
 
-def generate_configs(taskdir, task, gpu):
+def overwrite_task_config(config, specific_config):
+    """Overwrite config for specific task."""
+
+    # add specific task config
+    for key, value in specific_config.items():
+        if key in config:
+            config[key] = value
+
+    return config
+
+
+def generate_configs(expdir, task, setting):
     """Generate configs for all."""
 
     # create experiment dir
-    config_dir = os.path.join(taskdir, 'configs')
+    taskdir = os.path.join(expdir, '/'.join([task, setting, '']))
+    config_dir = os.path.join(expdir, '/'.join([task, setting, 'configs', '']))
     utils.makedir(config_dir)
 
     # default setting
@@ -102,54 +216,54 @@ def generate_configs(taskdir, task, gpu):
 
     # generate config for each task
     task_config = default_config.copy()
-    task_config['gpu'] = gpu
-    task_config['task_name'] = task_config['task_name'].replace('cg', task)
-    task_config['model_path'] = task_config['model_path'].replace('cg', task)
-    task_config['saved_params'] = task_config['saved_params'].replace('cg', task)
-    task_config['ev_eval_script_path'] = task_config['ev_eval_script_path'].replace('cg', task)
 
-    # predict config
-    predict_dev_config = task_config.copy()
-    gen_predict_config(predict_dev_config, specific_config, 'dev', config_dir, task, taskdir)
+    # generate data path
+    task_config['train_data'] = ''.join(["data/corpora/", task, "/train/"])
+    task_config['dev_data'] = ''.join(["data/corpora/", task, "/dev/"])
+    task_config['test_data'] = ''.join(["data/corpora/", task, "/dev/"])
+    # bert
+    task_config['bert_model'] = "data/bert/scibert_scivocab_cased"
 
-    predict_test_config = task_config.copy()
-    gen_predict_config(predict_test_config, specific_config, 'test', config_dir, task, taskdir)
+    # task specific
+    task_config['task_name'] = task
+    task_config['ner_eval_corpus'] = task
+    task_config['ev_eval_script_path'] = task_config['ev_eval_script_path'].replace('cg', task)
+    task_config['rule_dir'] = task_config['rule_dir'].replace('cg', task)
 
-    # for raw text
-    predict_test_config = task_config.copy()
-    gen_predict_config(predict_test_config, specific_config, 'raw-text', config_dir, task, taskdir)
+    # ner config
+    ner_config = task_config.copy()
+    gen_ner_config(ner_config, specific_config, config_dir, taskdir)
 
-    print('Generate configs: Done!')
+    # rel config
+    rel_config = task_config.copy()
+    gen_rel_config(rel_config, specific_config, config_dir, taskdir)
 
-    return
+    # ev config
+    ev_config = task_config.copy()
+    gen_ev_config(ev_config, specific_config, config_dir, taskdir)
 
+    # joint gold config
+    joint_config = task_config.copy()
+    gen_joint_config(joint_config, specific_config, config_dir, taskdir)
 
-def generate_configs_pubmed(expdir, dataname, model_name, gpu):
-    """Generate configs for all."""
+    # joint end-to-end config (predict entity)
+    joint_e2e_config = joint_config.copy()
+    gen_joint_e2e_config(joint_e2e_config, specific_config, config_dir, taskdir)
 
-    # create experiment dir
-    config_dir = os.path.join(expdir, ''.join([dataname, '/configs']))
-    utils.makedir(config_dir)
+    # predict config
+    predict_dev_config = task_config.copy()
+    gen_predict_config(predict_dev_config, 'dev', config_dir, taskdir)
 
-    # default setting
-    default_config_path = 'configs/default.yaml'
-    with open(default_config_path, 'r') as stream:
-        default_config = utils._ordered_load(stream)
+    predict_test_config = task_config.copy()
+    gen_predict_config(predict_test_config, 'test', config_dir, taskdir)
 
-    # read config for specific task
-    specific_config = read_specific_config(model_name)
+    # predict end-to-end config
 
-    # generate config for each task
-    task_config = default_config.copy()
-    task_config['gpu'] = gpu
-    task_config['task_name'] = task_config['task_name'].replace('cg', model_name)
-    task_config['model_path'] = task_config['model_path'].replace('cg', model_name)
-    task_config['saved_params'] = task_config['saved_params'].replace('cg', model_name)
-    task_config['ev_eval_script_path'] = task_config['ev_eval_script_path'].replace('cg', model_name)
+    predict_e2e_dev_config = predict_dev_config.copy()
+    gen_predict_e2e_config(predict_e2e_dev_config, 'dev', config_dir, taskdir)
 
-    # for raw text
-    predict_test_config = task_config.copy()
-    gen_predict_config_pubmed(predict_test_config, specific_config, config_dir, expdir, dataname)
+    predict_e2e_test_config = predict_test_config.copy()
+    gen_predict_e2e_config(predict_e2e_test_config, 'test', config_dir, taskdir)
 
     print('Generate configs: Done!')
 
@@ -157,12 +271,5 @@ def generate_configs_pubmed(expdir, dataname, model_name, gpu):
 
 
 if __name__ == '__main__':
-    # generate_configs_pubmed("experiments/", "cg", "my-pubmed", 0)
-
-    # bionlp
-    if len(sys.argv) == 4:
-        generate_configs(sys.argv[1], sys.argv[2], sys.argv[3])
-
-    # pubmed
-    elif len(sys.argv) == 5:
-        generate_configs_pubmed(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
+    # generate_configs("experiments/", "cg", "basic")
+    generate_configs(sys.argv[1], sys.argv[2], sys.argv[3])

From 9fbf3a2069445bbf1d807738da9b204555200ef3 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:09:37 +0900
Subject: [PATCH 08/70] configs for debug mode

---
 README.md                           |  5 ++++
 run/train/generate-configs-debug.sh | 13 ++++++++++
 run/train/generate-configs.sh       | 13 ++++++----
 scripts/generate_configs.py         | 40 ++++++++++++++++++++++++-----
 4 files changed, 60 insertions(+), 11 deletions(-)
 create mode 100644 run/train/generate-configs-debug.sh

diff --git a/README.md b/README.md
index f8fba47..0b525a1 100644
--- a/README.md
+++ b/README.md
@@ -83,10 +83,15 @@ sh run/train/download-bert.sh
 ```
 
 3. Generate configs
+- Configs for training CG task
 ```bash
 sh run/train/generate_configs.sh cg basic
 ```
 - Experiment name: basic, exp1, exp2, etc
+- Or running this debug mode (on a small data with several epochs)
+```bash
+sh run/train/generate_configs.sh cg debug
+```
 
 # 3. Predict (BioNLP tasks)
 
diff --git a/run/train/generate-configs-debug.sh b/run/train/generate-configs-debug.sh
new file mode 100644
index 0000000..503c7e6
--- /dev/null
+++ b/run/train/generate-configs-debug.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+TASK=$1 # cg
+EXPERIMENT_NAME=$2 # basic
+
+EXPERIMENT_DIR="experiments/"
+mkdir -p $EXPERIMENT_DIR
+
+python scripts/generate_configs.py \
+  --experiment_dir $EXPERIMENT_DIR \
+  --task_name $TASK \
+  --experiment_name $EXPERIMENT_NAME \
+  --debug_mode
\ No newline at end of file
diff --git a/run/train/generate-configs.sh b/run/train/generate-configs.sh
index c2df32d..fa79fc4 100644
--- a/run/train/generate-configs.sh
+++ b/run/train/generate-configs.sh
@@ -1,9 +1,12 @@
 #!/bin/bash
 
-experiment_dir="experiments/"
-mkdir -p $experiment_dir
+TASK=$1 # cg
+EXPERIMENT_NAME=$2 # basic
 
-task=$1
-setting=$2
+EXPERIMENT_DIR="experiments/"
+mkdir -p $EXPERIMENT_DIR
 
-python scripts/generate_configs.py $experiment_dir $task $setting
+python scripts/generate_configs.py \
+  --experiment_dir $EXPERIMENT_DIR \
+  --task_name $TASK \
+  --experiment_name $EXPERIMENT_NAME \
diff --git a/scripts/generate_configs.py b/scripts/generate_configs.py
index b6707e9..d77339c 100644
--- a/scripts/generate_configs.py
+++ b/scripts/generate_configs.py
@@ -2,6 +2,7 @@
 
 import os
 import sys
+import argparse
 
 sys.path.insert(0, '.')
 from utils import utils
@@ -198,15 +199,26 @@ def overwrite_task_config(config, specific_config):
     return config
 
 
-def generate_configs(expdir, task, setting):
+def set_debug_mode(configs, args):
+    if args.debug_mode:
+        if "train_data" in configs:
+            configs['train_data'] = configs['train_data'].replace('train', "debug")
+        if "dev_data" in configs:
+            configs['dev_data'] = configs['dev_data'].replace('dev', "debug")
+        if "test_data" in configs:
+            configs['test_data'] = configs['test_data'].replace('test', "debug")
+        if "epoch" in configs:
+            configs["epoch"] = 2
+
+def generate_configs(args, expdir, task, exp_name):
     """Generate configs for all."""
 
     # create experiment dir
-    taskdir = os.path.join(expdir, '/'.join([task, setting, '']))
-    config_dir = os.path.join(expdir, '/'.join([task, setting, 'configs', '']))
+    taskdir = os.path.join(expdir, '/'.join([task, exp_name, '']))
+    config_dir = os.path.join(expdir, '/'.join([task, exp_name, 'configs', '']))
     utils.makedir(config_dir)
 
-    # default setting
+    # default exp_name
     default_config_path = 'configs/default.yaml'
     with open(default_config_path, 'r') as stream:
         default_config = utils._ordered_load(stream)
@@ -221,6 +233,10 @@ def generate_configs(expdir, task, setting):
     task_config['train_data'] = ''.join(["data/corpora/", task, "/train/"])
     task_config['dev_data'] = ''.join(["data/corpora/", task, "/dev/"])
     task_config['test_data'] = ''.join(["data/corpora/", task, "/dev/"])
+
+    # debug mode
+    set_debug_mode(task_config, args)
+
     # bert
     task_config['bert_model'] = "data/bert/scibert_scivocab_cased"
 
@@ -270,6 +286,18 @@ def generate_configs(expdir, task, setting):
     return
 
 
+def main(arguments):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--experiment_dir', help='Directory for experiments', type=str, default='experiments')
+    parser.add_argument('--task_name', help='Name of task', type=str, default='cg')
+    parser.add_argument('--experiment_name', help='Name of this experiment', type=str,
+                        default='basic')
+    parser.add_argument("--debug_mode", action='store_true',
+                        help="Run experiments on a small data for debugging quickly")
+    args = parser.parse_args(arguments)
+
+    generate_configs(args, args.experiment_dir, args.task_name, args.experiment_name)
+
 if __name__ == '__main__':
-    # generate_configs("experiments/", "cg", "basic")
-    generate_configs(sys.argv[1], sys.argv[2], sys.argv[3])
+    # generate_configs("experiments/", "cg", "debug_mode")
+    main(sys.argv[1:])

From a83ddaa1e5fac60e3663db49f6b527b4da9b8de8 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:12:28 +0900
Subject: [PATCH 09/70] fix bug

---
 README.md                   | 2 +-
 scripts/generate_configs.py | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0b525a1..d9a3eff 100644
--- a/README.md
+++ b/README.md
@@ -90,7 +90,7 @@ sh run/train/generate_configs.sh cg basic
 - Experiment name: basic, exp1, exp2, etc
 - Or running this debug mode (on a small data with several epochs)
 ```bash
-sh run/train/generate_configs.sh cg debug
+sh run/train/generate_configs-debug.sh cg debug
 ```
 
 # 3. Predict (BioNLP tasks)
diff --git a/scripts/generate_configs.py b/scripts/generate_configs.py
index d77339c..5975fea 100644
--- a/scripts/generate_configs.py
+++ b/scripts/generate_configs.py
@@ -268,17 +268,21 @@ def generate_configs(args, expdir, task, exp_name):
 
     # predict config
     predict_dev_config = task_config.copy()
+    set_debug_mode(predict_dev_config, args)
     gen_predict_config(predict_dev_config, 'dev', config_dir, taskdir)
 
     predict_test_config = task_config.copy()
+    set_debug_mode(predict_test_config, args)
     gen_predict_config(predict_test_config, 'test', config_dir, taskdir)
 
     # predict end-to-end config
 
     predict_e2e_dev_config = predict_dev_config.copy()
+    set_debug_mode(predict_e2e_dev_config, args)
     gen_predict_e2e_config(predict_e2e_dev_config, 'dev', config_dir, taskdir)
 
     predict_e2e_test_config = predict_test_config.copy()
+    set_debug_mode(predict_e2e_test_config, args)
     gen_predict_e2e_config(predict_e2e_test_config, 'test', config_dir, taskdir)
 
     print('Generate configs: Done!')

From a1950b1c2c13befdf469f76a27a32ef09e86eced Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:17:04 +0900
Subject: [PATCH 10/70] fix bug

---
 scripts/generate_configs.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/scripts/generate_configs.py b/scripts/generate_configs.py
index 5975fea..629a03c 100644
--- a/scripts/generate_configs.py
+++ b/scripts/generate_configs.py
@@ -151,6 +151,7 @@ def gen_predict_config(predict_config, eval_set, config_dir, taskdir):
     """For joint prediction"""
 
     predict_config['test_data'] = predict_config['test_data'].replace('dev', eval_set)
+    set_debug_mode(predict_config, args)
     predict_config['result_dir'] = ''.join([taskdir, 'predict-gold-', eval_set, '/'])
     predict_config['save_params'] = False
     predict_config['joint_model_dir'] = ''.join([taskdir, 'joint-gold/model/'])
@@ -165,6 +166,7 @@ def gen_predict_e2e_config(predict_e2e_config, eval_set, config_dir, taskdir):
     """For joint end-to-end prediction"""
 
     predict_e2e_config['result_dir'] = ''.join([taskdir, 'predict-e2e-', eval_set, '/'])
+    set_debug_mode(predict_e2e_config, args)
     predict_e2e_config['joint_model_dir'] = ''.join([taskdir, 'joint-e2e/model/'])
     predict_e2e_config['params'] = ''.join([taskdir, 'joint-e2e/', predict_e2e_config['task_name'], '.param'])
     predict_e2e_config['ner_predict_all'] = True
@@ -268,21 +270,17 @@ def generate_configs(args, expdir, task, exp_name):
 
     # predict config
     predict_dev_config = task_config.copy()
-    set_debug_mode(predict_dev_config, args)
     gen_predict_config(predict_dev_config, 'dev', config_dir, taskdir)
 
     predict_test_config = task_config.copy()
-    set_debug_mode(predict_test_config, args)
     gen_predict_config(predict_test_config, 'test', config_dir, taskdir)
 
     # predict end-to-end config
 
     predict_e2e_dev_config = predict_dev_config.copy()
-    set_debug_mode(predict_e2e_dev_config, args)
     gen_predict_e2e_config(predict_e2e_dev_config, 'dev', config_dir, taskdir)
 
     predict_e2e_test_config = predict_test_config.copy()
-    set_debug_mode(predict_e2e_test_config, args)
     gen_predict_e2e_config(predict_e2e_test_config, 'test', config_dir, taskdir)
 
     print('Generate configs: Done!')

From d9c7bcbbe3d98652f929976778fb8d13c04224ec Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:18:12 +0900
Subject: [PATCH 11/70] fix bug

---
 scripts/generate_configs.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/generate_configs.py b/scripts/generate_configs.py
index 629a03c..4d41e80 100644
--- a/scripts/generate_configs.py
+++ b/scripts/generate_configs.py
@@ -147,7 +147,7 @@ def gen_joint_e2e_config(joint_e2e_config, task_config, config_dir, taskdir):
     write_config(os.path.join(config_dir, 'train-joint-e2e.yaml'), joint_e2e_config)
 
 
-def gen_predict_config(predict_config, eval_set, config_dir, taskdir):
+def gen_predict_config(args, predict_config, eval_set, config_dir, taskdir):
     """For joint prediction"""
 
     predict_config['test_data'] = predict_config['test_data'].replace('dev', eval_set)
@@ -162,7 +162,7 @@ def gen_predict_config(predict_config, eval_set, config_dir, taskdir):
     write_config(os.path.join(config_dir, ''.join(['predict-gold-', eval_set, '.yaml'])), predict_config)
 
 
-def gen_predict_e2e_config(predict_e2e_config, eval_set, config_dir, taskdir):
+def gen_predict_e2e_config(args, predict_e2e_config, eval_set, config_dir, taskdir):
     """For joint end-to-end prediction"""
 
     predict_e2e_config['result_dir'] = ''.join([taskdir, 'predict-e2e-', eval_set, '/'])
@@ -270,18 +270,18 @@ def generate_configs(args, expdir, task, exp_name):
 
     # predict config
     predict_dev_config = task_config.copy()
-    gen_predict_config(predict_dev_config, 'dev', config_dir, taskdir)
+    gen_predict_config(args, predict_dev_config, 'dev', config_dir, taskdir)
 
     predict_test_config = task_config.copy()
-    gen_predict_config(predict_test_config, 'test', config_dir, taskdir)
+    gen_predict_config(args, predict_test_config, 'test', config_dir, taskdir)
 
     # predict end-to-end config
 
     predict_e2e_dev_config = predict_dev_config.copy()
-    gen_predict_e2e_config(predict_e2e_dev_config, 'dev', config_dir, taskdir)
+    gen_predict_e2e_config(args, predict_e2e_dev_config, 'dev', config_dir, taskdir)
 
     predict_e2e_test_config = predict_test_config.copy()
-    gen_predict_e2e_config(predict_e2e_test_config, 'test', config_dir, taskdir)
+    gen_predict_e2e_config(args, predict_e2e_test_config, 'test', config_dir, taskdir)
 
     print('Generate configs: Done!')
 

From f5ca6f671b2b57d1aaf12df55e518cd4c9d50ac1 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:21:30 +0900
Subject: [PATCH 12/70] fix bugs

---
 scripts/generate_configs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/generate_configs.py b/scripts/generate_configs.py
index 4d41e80..130b1b5 100644
--- a/scripts/generate_configs.py
+++ b/scripts/generate_configs.py
@@ -207,7 +207,9 @@ def set_debug_mode(configs, args):
             configs['train_data'] = configs['train_data'].replace('train', "debug")
         if "dev_data" in configs:
             configs['dev_data'] = configs['dev_data'].replace('dev', "debug")
+            configs['dev_data'] = configs['dev_data'].replace('test', "debug")
         if "test_data" in configs:
+            configs['test_data'] = configs['test_data'].replace('dev', "debug")
             configs['test_data'] = configs['test_data'].replace('test', "debug")
         if "epoch" in configs:
             configs["epoch"] = 2

From 63e73b696bc1372acb3c914913ae8d0e3262318e Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:24:51 +0900
Subject: [PATCH 13/70] cg debug data

---
 data/corpora/cg/debug/PMID-1388088.a1      |   21 +
 data/corpora/cg/debug/PMID-1388088.a2      |   28 +
 data/corpora/cg/debug/PMID-1388088.ann     |   64 +
 data/corpora/cg/debug/PMID-1388088.inv.map | 1100 ++++++++++++
 data/corpora/cg/debug/PMID-1388088.map     |  912 ++++++++++
 data/corpora/cg/debug/PMID-1388088.txt     |    5 +
 data/corpora/cg/debug/PMID-1388088.txt.ori |    2 +
 data/corpora/cg/debug/PMID-198130.a1       |   35 +
 data/corpora/cg/debug/PMID-198130.a2       |   39 +
 data/corpora/cg/debug/PMID-198130.ann      |   98 ++
 data/corpora/cg/debug/PMID-198130.inv.map  | 1779 ++++++++++++++++++++
 data/corpora/cg/debug/PMID-198130.map      | 1513 +++++++++++++++++
 data/corpora/cg/debug/PMID-198130.txt      |    6 +
 data/corpora/cg/debug/PMID-198130.txt.ori  |    2 +
 14 files changed, 5604 insertions(+)
 create mode 100644 data/corpora/cg/debug/PMID-1388088.a1
 create mode 100644 data/corpora/cg/debug/PMID-1388088.a2
 create mode 100644 data/corpora/cg/debug/PMID-1388088.ann
 create mode 100644 data/corpora/cg/debug/PMID-1388088.inv.map
 create mode 100644 data/corpora/cg/debug/PMID-1388088.map
 create mode 100644 data/corpora/cg/debug/PMID-1388088.txt
 create mode 100644 data/corpora/cg/debug/PMID-1388088.txt.ori
 create mode 100644 data/corpora/cg/debug/PMID-198130.a1
 create mode 100644 data/corpora/cg/debug/PMID-198130.a2
 create mode 100644 data/corpora/cg/debug/PMID-198130.ann
 create mode 100644 data/corpora/cg/debug/PMID-198130.inv.map
 create mode 100644 data/corpora/cg/debug/PMID-198130.map
 create mode 100644 data/corpora/cg/debug/PMID-198130.txt
 create mode 100644 data/corpora/cg/debug/PMID-198130.txt.ori

diff --git a/data/corpora/cg/debug/PMID-1388088.a1 b/data/corpora/cg/debug/PMID-1388088.a1
new file mode 100644
index 0000000..5472964
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-1388088.a1
@@ -0,0 +1,21 @@
+T1	Gene_or_gene_product 0 35	Interleukin - 1 receptor antagonist
+T2	Cell 71 79	neuronal
+T3	Organism 94 97	rat
+T4	Gene_or_gene_product 100 115	Interleukin - 1
+T5	Gene_or_gene_product 118 124	IL - 1
+T6	Organ 144 149	brain
+T7	Gene_or_gene_product 189 195	IL - 1
+T8	Cell 284 292	neuronal
+T9	Organ 320 328	cerebral
+T10	Multi-tissue_structure 348 363	cerebral artery
+T11	Organism 425 429	rats
+T12	Gene_or_gene_product 458 484	IL - 1 receptor antagonist
+T13	Gene_or_gene_product 487 495	IL - 1ra
+T14	Multi-tissue_structure 589 597	striatal
+T15	Simple_chemical 613 617	NMDA
+T16	Simple_chemical 639 669	cis - 2 , 4 - methanoglutamate
+T17	Gene_or_gene_product 729 737	IL - 1ra
+T18	Gene_or_gene_product 776 782	IL - 1
+T19	Organ 826 831	brain
+T20	Gene_or_gene_product 864 870	IL - 1
+T21	Cell 943 951	neuronal
diff --git a/data/corpora/cg/debug/PMID-1388088.a2 b/data/corpora/cg/debug/PMID-1388088.a2
new file mode 100644
index 0000000..539aed4
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-1388088.a2
@@ -0,0 +1,28 @@
+*	Equiv T12 T13
+*	Equiv T4 T5
+T22	Negative_regulation 36 44	inhibits
+T23	Breakdown 80 86	damage
+T24	Gene_expression 127 136	synthesis
+T25	Positive_regulation 153 163	stimulated
+T26	Blood_vessel_development 248 266	neovascularization
+T27	Death 293 298	death
+T28	Negative_regulation 400 409	inhibited
+T29	Planned_process 430 438	injected
+T30	Planned_process 598 606	infusion
+T31	Planned_process 712 721	injection
+T32	Regulation 788 796	mediator
+T33	Breakdown 832 838	damage
+T34	Death 952 957	death
+E1	Negative_regulation:T22 Theme:E2 Cause:T1
+E2	Breakdown:T23 Theme:T2
+E3	Gene_expression:T24 Theme:T4
+E4	Positive_regulation:T25 Theme:E3
+E5	Blood_vessel_development:T26
+E6	Death:T27 Theme:T8
+E7	Negative_regulation:T28 Theme:E6
+E8	Planned_process:T29 Instrument:T12 Theme:T11
+E9	Planned_process:T30 Instrument:T16
+E10	Planned_process:T31 Instrument:T17
+E11	Regulation:T32 Cause:T18 Theme:E12
+E12	Breakdown:T33 Theme:T19
+E13	Death:T34 Theme:T21
diff --git a/data/corpora/cg/debug/PMID-1388088.ann b/data/corpora/cg/debug/PMID-1388088.ann
new file mode 100644
index 0000000..f8e5d6f
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-1388088.ann
@@ -0,0 +1,64 @@
+*	Equiv T12 T13
+*	Equiv T4 T5
+T1	Gene_or_gene_product 0 35	Interleukin - 1 receptor antagonist
+T2	Cell 71 79	neuronal
+T3	Organism 94 97	rat
+T4	Gene_or_gene_product 100 115	Interleukin - 1
+T5	Gene_or_gene_product 118 124	IL - 1
+T6	Organ 144 149	brain
+T7	Gene_or_gene_product 189 195	IL - 1
+T8	Cell 284 292	neuronal
+T9	Organ 320 328	cerebral
+T10	Multi-tissue_structure 348 363	cerebral artery
+T11	Organism 425 429	rats
+T12	Gene_or_gene_product 458 484	IL - 1 receptor antagonist
+T13	Gene_or_gene_product 487 495	IL - 1ra
+T14	Multi-tissue_structure 589 597	striatal
+T15	Simple_chemical 613 617	NMDA
+T16	Simple_chemical 639 669	cis - 2 , 4 - methanoglutamate
+T17	Gene_or_gene_product 729 737	IL - 1ra
+T18	Gene_or_gene_product 776 782	IL - 1
+T19	Organ 826 831	brain
+T20	Gene_or_gene_product 864 870	IL - 1
+T21	Cell 943 951	neuronal
+TR22	Negative_regulation 36 44	inhibits
+TR23	Breakdown 80 86	damage
+TR24	Gene_expression 127 136	synthesis
+TR25	Positive_regulation 153 163	stimulated
+TR26	Blood_vessel_development 248 266	neovascularization
+TR27	Death 293 298	death
+TR28	Negative_regulation 400 409	inhibited
+TR29	Planned_process 430 438	injected
+TR30	Planned_process 598 606	infusion
+TR31	Planned_process 712 721	injection
+TR32	Regulation 788 796	mediator
+TR33	Breakdown 832 838	damage
+TR34	Death 952 957	death
+R1	Theme Arg1:TR22 Arg2:TR23
+R2	Theme Arg1:TR25 Arg2:TR24
+R3	Theme Arg1:TR33 Arg2:T19
+R4	Cause Arg1:TR32 Arg2:T18
+R5	Instrument Arg1:TR31 Arg2:T17
+R6	Cause Arg1:TR22 Arg2:T1
+R7	Instrument Arg1:TR30 Arg2:T16
+R8	Instrument Arg1:TR29 Arg2:T12
+R9	Theme Arg1:TR24 Arg2:T4
+R10	Theme Arg1:TR27 Arg2:T8
+R11	Theme Arg1:TR29 Arg2:T11
+R12	Theme Arg1:TR28 Arg2:TR27
+R13	Theme Arg1:TR23 Arg2:T2
+R14	Theme Arg1:TR32 Arg2:TR33
+R15	Theme Arg1:TR34 Arg2:T21
+E1	Negative_regulation:TR22 Theme:E2 Cause:T1
+E2	Breakdown:TR23 Theme:T2
+E3	Gene_expression:TR24 Theme:T4
+E4	Positive_regulation:TR25 Theme:E3
+E5	Blood_vessel_development:TR26
+E6	Death:TR27 Theme:T8
+E7	Negative_regulation:TR28 Theme:E6
+E8	Planned_process:TR29 Instrument:T12 Theme:T11
+E9	Planned_process:TR30 Instrument:T16
+E10	Planned_process:TR31 Instrument:T17
+E11	Regulation:TR32 Cause:T18 Theme:E12
+E12	Breakdown:TR33 Theme:T19
+E13	Death:TR34 Theme:T21
diff --git a/data/corpora/cg/debug/PMID-1388088.inv.map b/data/corpora/cg/debug/PMID-1388088.inv.map
new file mode 100644
index 0000000..33ee54c
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-1388088.inv.map
@@ -0,0 +1,1100 @@
+{
+  "0": 0,
+  "1": 1,
+  "2": 2,
+  "3": 3,
+  "4": 4,
+  "5": 5,
+  "6": 6,
+  "7": 7,
+  "8": 8,
+  "9": 9,
+  "10": 10,
+  "11": 11,
+  "12": 11,
+  "13": 12,
+  "14": 12,
+  "15": 13,
+  "16": 14,
+  "17": 15,
+  "18": 16,
+  "19": 17,
+  "20": 18,
+  "21": 19,
+  "22": 20,
+  "23": 21,
+  "24": 22,
+  "25": 23,
+  "26": 24,
+  "27": 25,
+  "28": 26,
+  "29": 27,
+  "30": 28,
+  "31": 29,
+  "32": 30,
+  "33": 31,
+  "34": 32,
+  "35": 33,
+  "36": 34,
+  "37": 35,
+  "38": 36,
+  "39": 37,
+  "40": 38,
+  "41": 39,
+  "42": 40,
+  "43": 41,
+  "44": 42,
+  "45": 43,
+  "46": 44,
+  "47": 45,
+  "48": 46,
+  "49": 47,
+  "50": 48,
+  "51": 49,
+  "52": 50,
+  "53": 51,
+  "54": 52,
+  "55": 53,
+  "56": 54,
+  "57": 55,
+  "58": 56,
+  "59": 57,
+  "60": 58,
+  "61": 59,
+  "62": 60,
+  "63": 61,
+  "64": 62,
+  "65": 63,
+  "66": 64,
+  "67": 65,
+  "68": 66,
+  "69": 67,
+  "70": 68,
+  "71": 69,
+  "72": 70,
+  "73": 71,
+  "74": 72,
+  "75": 73,
+  "76": 74,
+  "77": 75,
+  "78": 76,
+  "79": 77,
+  "80": 78,
+  "81": 79,
+  "82": 80,
+  "83": 81,
+  "84": 82,
+  "85": 83,
+  "86": 84,
+  "87": 85,
+  "88": 86,
+  "89": 87,
+  "90": 88,
+  "91": 89,
+  "92": 90,
+  "93": 91,
+  "94": 92,
+  "95": 93,
+  "96": 94,
+  "97": 95,
+  "98": 95,
+  "99": 96,
+  "100": 97,
+  "101": 98,
+  "102": 99,
+  "103": 100,
+  "104": 101,
+  "105": 102,
+  "106": 103,
+  "107": 104,
+  "108": 105,
+  "109": 106,
+  "110": 107,
+  "111": 108,
+  "112": 108,
+  "113": 109,
+  "114": 109,
+  "115": 110,
+  "116": 111,
+  "117": 112,
+  "118": 112,
+  "119": 113,
+  "120": 114,
+  "121": 114,
+  "122": 115,
+  "123": 115,
+  "124": 116,
+  "125": 116,
+  "126": 117,
+  "127": 118,
+  "128": 119,
+  "129": 120,
+  "130": 121,
+  "131": 122,
+  "132": 123,
+  "133": 124,
+  "134": 125,
+  "135": 126,
+  "136": 127,
+  "137": 128,
+  "138": 129,
+  "139": 130,
+  "140": 131,
+  "141": 132,
+  "142": 133,
+  "143": 134,
+  "144": 135,
+  "145": 136,
+  "146": 137,
+  "147": 138,
+  "148": 139,
+  "149": 140,
+  "150": 141,
+  "151": 142,
+  "152": 143,
+  "153": 144,
+  "154": 145,
+  "155": 146,
+  "156": 147,
+  "157": 148,
+  "158": 149,
+  "159": 150,
+  "160": 151,
+  "161": 152,
+  "162": 153,
+  "163": 154,
+  "164": 155,
+  "165": 156,
+  "166": 157,
+  "167": 158,
+  "168": 159,
+  "169": 160,
+  "170": 161,
+  "171": 162,
+  "172": 163,
+  "173": 164,
+  "174": 165,
+  "175": 166,
+  "176": 167,
+  "177": 168,
+  "178": 169,
+  "179": 170,
+  "180": 171,
+  "181": 172,
+  "182": 173,
+  "183": 174,
+  "184": 175,
+  "185": 176,
+  "186": 177,
+  "187": 178,
+  "188": 179,
+  "189": 180,
+  "190": 181,
+  "191": 182,
+  "192": 182,
+  "193": 183,
+  "194": 183,
+  "195": 184,
+  "196": 185,
+  "197": 186,
+  "198": 187,
+  "199": 188,
+  "200": 189,
+  "201": 190,
+  "202": 191,
+  "203": 192,
+  "204": 193,
+  "205": 194,
+  "206": 195,
+  "207": 196,
+  "208": 197,
+  "209": 198,
+  "210": 199,
+  "211": 200,
+  "212": 201,
+  "213": 202,
+  "214": 203,
+  "215": 204,
+  "216": 205,
+  "217": 206,
+  "218": 207,
+  "219": 208,
+  "220": 209,
+  "221": 210,
+  "222": 211,
+  "223": 212,
+  "224": 213,
+  "225": 214,
+  "226": 214,
+  "227": 215,
+  "228": 216,
+  "229": 217,
+  "230": 218,
+  "231": 219,
+  "232": 220,
+  "233": 221,
+  "234": 222,
+  "235": 223,
+  "236": 224,
+  "237": 225,
+  "238": 226,
+  "239": 227,
+  "240": 228,
+  "241": 229,
+  "242": 230,
+  "243": 231,
+  "244": 232,
+  "245": 233,
+  "246": 234,
+  "247": 235,
+  "248": 236,
+  "249": 237,
+  "250": 238,
+  "251": 239,
+  "252": 240,
+  "253": 241,
+  "254": 242,
+  "255": 243,
+  "256": 244,
+  "257": 245,
+  "258": 246,
+  "259": 247,
+  "260": 248,
+  "261": 249,
+  "262": 250,
+  "263": 251,
+  "264": 252,
+  "265": 253,
+  "266": 254,
+  "267": 254,
+  "268": 255,
+  "269": 256,
+  "270": 257,
+  "271": 258,
+  "272": 259,
+  "273": 260,
+  "274": 261,
+  "275": 262,
+  "276": 263,
+  "277": 264,
+  "278": 265,
+  "279": 266,
+  "280": 267,
+  "281": 268,
+  "282": 269,
+  "283": 270,
+  "284": 271,
+  "285": 272,
+  "286": 273,
+  "287": 274,
+  "288": 275,
+  "289": 276,
+  "290": 277,
+  "291": 278,
+  "292": 279,
+  "293": 280,
+  "294": 281,
+  "295": 282,
+  "296": 283,
+  "297": 284,
+  "298": 285,
+  "299": 286,
+  "300": 287,
+  "301": 288,
+  "302": 289,
+  "303": 290,
+  "304": 291,
+  "305": 292,
+  "306": 293,
+  "307": 294,
+  "308": 295,
+  "309": 296,
+  "310": 297,
+  "311": 298,
+  "312": 299,
+  "313": 300,
+  "314": 301,
+  "315": 302,
+  "316": 303,
+  "317": 304,
+  "318": 305,
+  "319": 306,
+  "320": 307,
+  "321": 308,
+  "322": 309,
+  "323": 310,
+  "324": 311,
+  "325": 312,
+  "326": 313,
+  "327": 314,
+  "328": 315,
+  "329": 316,
+  "330": 317,
+  "331": 318,
+  "332": 319,
+  "333": 320,
+  "334": 321,
+  "335": 322,
+  "336": 323,
+  "337": 324,
+  "338": 325,
+  "339": 326,
+  "340": 327,
+  "341": 327,
+  "342": 328,
+  "343": 329,
+  "344": 330,
+  "345": 331,
+  "346": 332,
+  "347": 333,
+  "348": 334,
+  "349": 335,
+  "350": 336,
+  "351": 337,
+  "352": 338,
+  "353": 339,
+  "354": 340,
+  "355": 341,
+  "356": 342,
+  "357": 343,
+  "358": 344,
+  "359": 345,
+  "360": 346,
+  "361": 347,
+  "362": 348,
+  "363": 349,
+  "364": 350,
+  "365": 351,
+  "366": 352,
+  "367": 353,
+  "368": 354,
+  "369": 355,
+  "370": 356,
+  "371": 357,
+  "372": 358,
+  "373": 359,
+  "374": 359,
+  "375": 360,
+  "376": 361,
+  "377": 362,
+  "378": 363,
+  "379": 364,
+  "380": 365,
+  "381": 365,
+  "382": 366,
+  "383": 367,
+  "384": 368,
+  "385": 369,
+  "386": 370,
+  "387": 371,
+  "388": 372,
+  "389": 373,
+  "390": 374,
+  "391": 375,
+  "392": 376,
+  "393": 377,
+  "394": 378,
+  "395": 379,
+  "396": 380,
+  "397": 381,
+  "398": 382,
+  "399": 383,
+  "400": 384,
+  "401": 385,
+  "402": 386,
+  "403": 387,
+  "404": 388,
+  "405": 389,
+  "406": 390,
+  "407": 391,
+  "408": 392,
+  "409": 393,
+  "410": 394,
+  "411": 395,
+  "412": 395,
+  "413": 396,
+  "414": 397,
+  "415": 398,
+  "416": 399,
+  "417": 400,
+  "418": 400,
+  "419": 401,
+  "420": 401,
+  "421": 402,
+  "422": 403,
+  "423": 404,
+  "424": 405,
+  "425": 406,
+  "426": 407,
+  "427": 408,
+  "428": 409,
+  "429": 410,
+  "430": 411,
+  "431": 412,
+  "432": 413,
+  "433": 414,
+  "434": 415,
+  "435": 416,
+  "436": 417,
+  "437": 418,
+  "438": 419,
+  "439": 420,
+  "440": 421,
+  "441": 422,
+  "442": 423,
+  "443": 424,
+  "444": 425,
+  "445": 426,
+  "446": 427,
+  "447": 428,
+  "448": 429,
+  "449": 430,
+  "450": 431,
+  "451": 432,
+  "452": 433,
+  "453": 434,
+  "454": 435,
+  "455": 436,
+  "456": 437,
+  "457": 438,
+  "458": 439,
+  "459": 440,
+  "460": 441,
+  "461": 441,
+  "462": 442,
+  "463": 442,
+  "464": 443,
+  "465": 444,
+  "466": 445,
+  "467": 446,
+  "468": 447,
+  "469": 448,
+  "470": 449,
+  "471": 450,
+  "472": 451,
+  "473": 452,
+  "474": 453,
+  "475": 454,
+  "476": 455,
+  "477": 456,
+  "478": 457,
+  "479": 458,
+  "480": 459,
+  "481": 460,
+  "482": 461,
+  "483": 462,
+  "484": 463,
+  "485": 464,
+  "486": 465,
+  "487": 465,
+  "488": 466,
+  "489": 467,
+  "490": 467,
+  "491": 468,
+  "492": 468,
+  "493": 469,
+  "494": 470,
+  "495": 471,
+  "496": 471,
+  "497": 472,
+  "498": 473,
+  "499": 474,
+  "500": 475,
+  "501": 476,
+  "502": 477,
+  "503": 478,
+  "504": 479,
+  "505": 480,
+  "506": 481,
+  "507": 482,
+  "508": 483,
+  "509": 484,
+  "510": 485,
+  "511": 486,
+  "512": 486,
+  "513": 487,
+  "514": 488,
+  "515": 489,
+  "516": 490,
+  "517": 491,
+  "518": 492,
+  "519": 493,
+  "520": 494,
+  "521": 495,
+  "522": 496,
+  "523": 497,
+  "524": 498,
+  "525": 499,
+  "526": 500,
+  "527": 501,
+  "528": 502,
+  "529": 503,
+  "530": 504,
+  "531": 505,
+  "532": 506,
+  "533": 507,
+  "534": 508,
+  "535": 509,
+  "536": 510,
+  "537": 511,
+  "538": 512,
+  "539": 513,
+  "540": 514,
+  "541": 515,
+  "542": 516,
+  "543": 517,
+  "544": 518,
+  "545": 519,
+  "546": 520,
+  "547": 521,
+  "548": 522,
+  "549": 523,
+  "550": 524,
+  "551": 525,
+  "552": 526,
+  "553": 527,
+  "554": 528,
+  "555": 529,
+  "556": 530,
+  "557": 531,
+  "558": 532,
+  "559": 532,
+  "560": 533,
+  "561": 533,
+  "562": 534,
+  "563": 535,
+  "564": 536,
+  "565": 537,
+  "566": 538,
+  "567": 539,
+  "568": 540,
+  "569": 541,
+  "570": 542,
+  "571": 543,
+  "572": 544,
+  "573": 545,
+  "574": 546,
+  "575": 547,
+  "576": 548,
+  "577": 549,
+  "578": 550,
+  "579": 551,
+  "580": 552,
+  "581": 553,
+  "582": 554,
+  "583": 555,
+  "584": 556,
+  "585": 557,
+  "586": 558,
+  "587": 559,
+  "588": 560,
+  "589": 561,
+  "590": 562,
+  "591": 563,
+  "592": 564,
+  "593": 565,
+  "594": 566,
+  "595": 567,
+  "596": 568,
+  "597": 569,
+  "598": 570,
+  "599": 571,
+  "600": 572,
+  "601": 573,
+  "602": 574,
+  "603": 575,
+  "604": 576,
+  "605": 577,
+  "606": 578,
+  "607": 579,
+  "608": 580,
+  "609": 581,
+  "610": 582,
+  "611": 583,
+  "612": 584,
+  "613": 585,
+  "614": 586,
+  "615": 587,
+  "616": 588,
+  "617": 589,
+  "618": 589,
+  "619": 590,
+  "620": 590,
+  "621": 591,
+  "622": 592,
+  "623": 593,
+  "624": 594,
+  "625": 595,
+  "626": 596,
+  "627": 597,
+  "628": 598,
+  "629": 599,
+  "630": 600,
+  "631": 601,
+  "632": 602,
+  "633": 603,
+  "634": 604,
+  "635": 605,
+  "636": 606,
+  "637": 607,
+  "638": 608,
+  "639": 608,
+  "640": 609,
+  "641": 610,
+  "642": 611,
+  "643": 611,
+  "644": 612,
+  "645": 612,
+  "646": 613,
+  "647": 613,
+  "648": 614,
+  "649": 614,
+  "650": 615,
+  "651": 615,
+  "652": 616,
+  "653": 616,
+  "654": 617,
+  "655": 618,
+  "656": 619,
+  "657": 620,
+  "658": 621,
+  "659": 622,
+  "660": 623,
+  "661": 624,
+  "662": 625,
+  "663": 626,
+  "664": 627,
+  "665": 628,
+  "666": 629,
+  "667": 630,
+  "668": 631,
+  "669": 632,
+  "670": 632,
+  "671": 633,
+  "672": 634,
+  "673": 635,
+  "674": 636,
+  "675": 637,
+  "676": 638,
+  "677": 639,
+  "678": 640,
+  "679": 641,
+  "680": 642,
+  "681": 643,
+  "682": 644,
+  "683": 645,
+  "684": 646,
+  "685": 647,
+  "686": 648,
+  "687": 649,
+  "688": 650,
+  "689": 651,
+  "690": 652,
+  "691": 653,
+  "692": 654,
+  "693": 655,
+  "694": 656,
+  "695": 657,
+  "696": 658,
+  "697": 659,
+  "698": 660,
+  "699": 661,
+  "700": 662,
+  "701": 663,
+  "702": 663,
+  "703": 664,
+  "704": 665,
+  "705": 665,
+  "706": 666,
+  "707": 666,
+  "708": 667,
+  "709": 668,
+  "710": 669,
+  "711": 670,
+  "712": 671,
+  "713": 672,
+  "714": 673,
+  "715": 674,
+  "716": 675,
+  "717": 676,
+  "718": 677,
+  "719": 678,
+  "720": 679,
+  "721": 680,
+  "722": 681,
+  "723": 682,
+  "724": 683,
+  "725": 684,
+  "726": 685,
+  "727": 686,
+  "728": 687,
+  "729": 688,
+  "730": 689,
+  "731": 690,
+  "732": 690,
+  "733": 691,
+  "734": 691,
+  "735": 692,
+  "736": 693,
+  "737": 694,
+  "738": 694,
+  "739": 695,
+  "740": 696,
+  "741": 697,
+  "742": 698,
+  "743": 699,
+  "744": 700,
+  "745": 701,
+  "746": 702,
+  "747": 703,
+  "748": 704,
+  "749": 705,
+  "750": 706,
+  "751": 707,
+  "752": 708,
+  "753": 709,
+  "754": 710,
+  "755": 711,
+  "756": 712,
+  "757": 713,
+  "758": 714,
+  "759": 715,
+  "760": 716,
+  "761": 717,
+  "762": 718,
+  "763": 719,
+  "764": 720,
+  "765": 721,
+  "766": 722,
+  "767": 723,
+  "768": 724,
+  "769": 725,
+  "770": 726,
+  "771": 727,
+  "772": 728,
+  "773": 729,
+  "774": 730,
+  "775": 731,
+  "776": 732,
+  "777": 733,
+  "778": 734,
+  "779": 734,
+  "780": 735,
+  "781": 735,
+  "782": 736,
+  "783": 737,
+  "784": 738,
+  "785": 739,
+  "786": 740,
+  "787": 741,
+  "788": 742,
+  "789": 743,
+  "790": 744,
+  "791": 745,
+  "792": 746,
+  "793": 747,
+  "794": 748,
+  "795": 749,
+  "796": 750,
+  "797": 751,
+  "798": 752,
+  "799": 753,
+  "800": 754,
+  "801": 755,
+  "802": 756,
+  "803": 757,
+  "804": 758,
+  "805": 759,
+  "806": 760,
+  "807": 761,
+  "808": 762,
+  "809": 763,
+  "810": 764,
+  "811": 765,
+  "812": 766,
+  "813": 767,
+  "814": 768,
+  "815": 769,
+  "816": 770,
+  "817": 771,
+  "818": 772,
+  "819": 773,
+  "820": 774,
+  "821": 775,
+  "822": 776,
+  "823": 777,
+  "824": 778,
+  "825": 779,
+  "826": 780,
+  "827": 781,
+  "828": 782,
+  "829": 783,
+  "830": 784,
+  "831": 785,
+  "832": 786,
+  "833": 787,
+  "834": 788,
+  "835": 789,
+  "836": 790,
+  "837": 791,
+  "838": 792,
+  "839": 792,
+  "840": 793,
+  "841": 794,
+  "842": 795,
+  "843": 796,
+  "844": 797,
+  "845": 798,
+  "846": 799,
+  "847": 800,
+  "848": 801,
+  "849": 802,
+  "850": 803,
+  "851": 804,
+  "852": 805,
+  "853": 806,
+  "854": 807,
+  "855": 808,
+  "856": 809,
+  "857": 810,
+  "858": 811,
+  "859": 812,
+  "860": 813,
+  "861": 814,
+  "862": 815,
+  "863": 816,
+  "864": 817,
+  "865": 818,
+  "866": 819,
+  "867": 819,
+  "868": 820,
+  "869": 820,
+  "870": 821,
+  "871": 822,
+  "872": 823,
+  "873": 824,
+  "874": 825,
+  "875": 826,
+  "876": 827,
+  "877": 828,
+  "878": 829,
+  "879": 830,
+  "880": 831,
+  "881": 832,
+  "882": 833,
+  "883": 834,
+  "884": 835,
+  "885": 836,
+  "886": 837,
+  "887": 838,
+  "888": 839,
+  "889": 840,
+  "890": 841,
+  "891": 842,
+  "892": 843,
+  "893": 844,
+  "894": 845,
+  "895": 846,
+  "896": 847,
+  "897": 848,
+  "898": 849,
+  "899": 850,
+  "900": 851,
+  "901": 852,
+  "902": 853,
+  "903": 854,
+  "904": 855,
+  "905": 856,
+  "906": 857,
+  "907": 858,
+  "908": 859,
+  "909": 860,
+  "910": 861,
+  "911": 862,
+  "912": 863,
+  "913": 864,
+  "914": 865,
+  "915": 866,
+  "916": 867,
+  "917": 868,
+  "918": 869,
+  "919": 870,
+  "920": 871,
+  "921": 872,
+  "922": 873,
+  "923": 874,
+  "924": 875,
+  "925": 876,
+  "926": 877,
+  "927": 878,
+  "928": 879,
+  "929": 880,
+  "930": 881,
+  "931": 882,
+  "932": 883,
+  "933": 884,
+  "934": 885,
+  "935": 886,
+  "936": 887,
+  "937": 888,
+  "938": 889,
+  "939": 890,
+  "940": 891,
+  "941": 892,
+  "942": 893,
+  "943": 894,
+  "944": 895,
+  "945": 896,
+  "946": 897,
+  "947": 898,
+  "948": 899,
+  "949": 900,
+  "950": 901,
+  "951": 902,
+  "952": 903,
+  "953": 904,
+  "954": 905,
+  "955": 906,
+  "956": 907,
+  "957": 908,
+  "958": 908,
+  "959": 909,
+  "entities": {
+    "T1": [
+      0,
+      33
+    ],
+    "T2": [
+      69,
+      77
+    ],
+    "T3": [
+      92,
+      95
+    ],
+    "T4": [
+      97,
+      110
+    ],
+    "T5": [
+      112,
+      116
+    ],
+    "T6": [
+      135,
+      140
+    ],
+    "T7": [
+      180,
+      184
+    ],
+    "T8": [
+      271,
+      279
+    ],
+    "T9": [
+      307,
+      315
+    ],
+    "T10": [
+      334,
+      349
+    ],
+    "T11": [
+      406,
+      410
+    ],
+    "T12": [
+      439,
+      463
+    ],
+    "T13": [
+      465,
+      471
+    ],
+    "T14": [
+      561,
+      569
+    ],
+    "T15": [
+      585,
+      589
+    ],
+    "T16": [
+      608,
+      632
+    ],
+    "T17": [
+      688,
+      694
+    ],
+    "T18": [
+      732,
+      736
+    ],
+    "T19": [
+      780,
+      785
+    ],
+    "T20": [
+      817,
+      821
+    ],
+    "T21": [
+      894,
+      902
+    ],
+    "T22": [
+      34,
+      42
+    ],
+    "T23": [
+      78,
+      84
+    ],
+    "T24": [
+      118,
+      127
+    ],
+    "T25": [
+      144,
+      154
+    ],
+    "T26": [
+      236,
+      254
+    ],
+    "T27": [
+      280,
+      285
+    ],
+    "T28": [
+      384,
+      393
+    ],
+    "T29": [
+      411,
+      419
+    ],
+    "T30": [
+      570,
+      578
+    ],
+    "T31": [
+      671,
+      680
+    ],
+    "T32": [
+      742,
+      750
+    ],
+    "T33": [
+      786,
+      792
+    ],
+    "T34": [
+      903,
+      908
+    ]
+  }
+}
\ No newline at end of file
diff --git a/data/corpora/cg/debug/PMID-1388088.map b/data/corpora/cg/debug/PMID-1388088.map
new file mode 100644
index 0000000..3c4e4ae
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-1388088.map
@@ -0,0 +1,912 @@
+{
+  "0": 0,
+  "1": 1,
+  "2": 2,
+  "3": 3,
+  "4": 4,
+  "5": 5,
+  "6": 6,
+  "7": 7,
+  "8": 8,
+  "9": 9,
+  "10": 10,
+  "11": 12,
+  "12": 14,
+  "13": 15,
+  "14": 16,
+  "15": 17,
+  "16": 18,
+  "17": 19,
+  "18": 20,
+  "19": 21,
+  "20": 22,
+  "21": 23,
+  "22": 24,
+  "23": 25,
+  "24": 26,
+  "25": 27,
+  "26": 28,
+  "27": 29,
+  "28": 30,
+  "29": 31,
+  "30": 32,
+  "31": 33,
+  "32": 34,
+  "33": 35,
+  "34": 36,
+  "35": 37,
+  "36": 38,
+  "37": 39,
+  "38": 40,
+  "39": 41,
+  "40": 42,
+  "41": 43,
+  "42": 44,
+  "43": 45,
+  "44": 46,
+  "45": 47,
+  "46": 48,
+  "47": 49,
+  "48": 50,
+  "49": 51,
+  "50": 52,
+  "51": 53,
+  "52": 54,
+  "53": 55,
+  "54": 56,
+  "55": 57,
+  "56": 58,
+  "57": 59,
+  "58": 60,
+  "59": 61,
+  "60": 62,
+  "61": 63,
+  "62": 64,
+  "63": 65,
+  "64": 66,
+  "65": 67,
+  "66": 68,
+  "67": 69,
+  "68": 70,
+  "69": 71,
+  "70": 72,
+  "71": 73,
+  "72": 74,
+  "73": 75,
+  "74": 76,
+  "75": 77,
+  "76": 78,
+  "77": 79,
+  "78": 80,
+  "79": 81,
+  "80": 82,
+  "81": 83,
+  "82": 84,
+  "83": 85,
+  "84": 86,
+  "85": 87,
+  "86": 88,
+  "87": 89,
+  "88": 90,
+  "89": 91,
+  "90": 92,
+  "91": 93,
+  "92": 94,
+  "93": 95,
+  "94": 96,
+  "95": 98,
+  "96": 99,
+  "97": 100,
+  "98": 101,
+  "99": 102,
+  "100": 103,
+  "101": 104,
+  "102": 105,
+  "103": 106,
+  "104": 107,
+  "105": 108,
+  "106": 109,
+  "107": 110,
+  "108": 112,
+  "109": 114,
+  "110": 115,
+  "111": 116,
+  "112": 118,
+  "113": 119,
+  "114": 121,
+  "115": 123,
+  "116": 125,
+  "117": 126,
+  "118": 127,
+  "119": 128,
+  "120": 129,
+  "121": 130,
+  "122": 131,
+  "123": 132,
+  "124": 133,
+  "125": 134,
+  "126": 135,
+  "127": 136,
+  "128": 137,
+  "129": 138,
+  "130": 139,
+  "131": 140,
+  "132": 141,
+  "133": 142,
+  "134": 143,
+  "135": 144,
+  "136": 145,
+  "137": 146,
+  "138": 147,
+  "139": 148,
+  "140": 149,
+  "141": 150,
+  "142": 151,
+  "143": 152,
+  "144": 153,
+  "145": 154,
+  "146": 155,
+  "147": 156,
+  "148": 157,
+  "149": 158,
+  "150": 159,
+  "151": 160,
+  "152": 161,
+  "153": 162,
+  "154": 163,
+  "155": 164,
+  "156": 165,
+  "157": 166,
+  "158": 167,
+  "159": 168,
+  "160": 169,
+  "161": 170,
+  "162": 171,
+  "163": 172,
+  "164": 173,
+  "165": 174,
+  "166": 175,
+  "167": 176,
+  "168": 177,
+  "169": 178,
+  "170": 179,
+  "171": 180,
+  "172": 181,
+  "173": 182,
+  "174": 183,
+  "175": 184,
+  "176": 185,
+  "177": 186,
+  "178": 187,
+  "179": 188,
+  "180": 189,
+  "181": 190,
+  "182": 192,
+  "183": 194,
+  "184": 195,
+  "185": 196,
+  "186": 197,
+  "187": 198,
+  "188": 199,
+  "189": 200,
+  "190": 201,
+  "191": 202,
+  "192": 203,
+  "193": 204,
+  "194": 205,
+  "195": 206,
+  "196": 207,
+  "197": 208,
+  "198": 209,
+  "199": 210,
+  "200": 211,
+  "201": 212,
+  "202": 213,
+  "203": 214,
+  "204": 215,
+  "205": 216,
+  "206": 217,
+  "207": 218,
+  "208": 219,
+  "209": 220,
+  "210": 221,
+  "211": 222,
+  "212": 223,
+  "213": 224,
+  "214": 226,
+  "215": 227,
+  "216": 228,
+  "217": 229,
+  "218": 230,
+  "219": 231,
+  "220": 232,
+  "221": 233,
+  "222": 234,
+  "223": 235,
+  "224": 236,
+  "225": 237,
+  "226": 238,
+  "227": 239,
+  "228": 240,
+  "229": 241,
+  "230": 242,
+  "231": 243,
+  "232": 244,
+  "233": 245,
+  "234": 246,
+  "235": 247,
+  "236": 248,
+  "237": 249,
+  "238": 250,
+  "239": 251,
+  "240": 252,
+  "241": 253,
+  "242": 254,
+  "243": 255,
+  "244": 256,
+  "245": 257,
+  "246": 258,
+  "247": 259,
+  "248": 260,
+  "249": 261,
+  "250": 262,
+  "251": 263,
+  "252": 264,
+  "253": 265,
+  "254": 267,
+  "255": 268,
+  "256": 269,
+  "257": 270,
+  "258": 271,
+  "259": 272,
+  "260": 273,
+  "261": 274,
+  "262": 275,
+  "263": 276,
+  "264": 277,
+  "265": 278,
+  "266": 279,
+  "267": 280,
+  "268": 281,
+  "269": 282,
+  "270": 283,
+  "271": 284,
+  "272": 285,
+  "273": 286,
+  "274": 287,
+  "275": 288,
+  "276": 289,
+  "277": 290,
+  "278": 291,
+  "279": 292,
+  "280": 293,
+  "281": 294,
+  "282": 295,
+  "283": 296,
+  "284": 297,
+  "285": 298,
+  "286": 299,
+  "287": 300,
+  "288": 301,
+  "289": 302,
+  "290": 303,
+  "291": 304,
+  "292": 305,
+  "293": 306,
+  "294": 307,
+  "295": 308,
+  "296": 309,
+  "297": 310,
+  "298": 311,
+  "299": 312,
+  "300": 313,
+  "301": 314,
+  "302": 315,
+  "303": 316,
+  "304": 317,
+  "305": 318,
+  "306": 319,
+  "307": 320,
+  "308": 321,
+  "309": 322,
+  "310": 323,
+  "311": 324,
+  "312": 325,
+  "313": 326,
+  "314": 327,
+  "315": 328,
+  "316": 329,
+  "317": 330,
+  "318": 331,
+  "319": 332,
+  "320": 333,
+  "321": 334,
+  "322": 335,
+  "323": 336,
+  "324": 337,
+  "325": 338,
+  "326": 339,
+  "327": 341,
+  "328": 342,
+  "329": 343,
+  "330": 344,
+  "331": 345,
+  "332": 346,
+  "333": 347,
+  "334": 348,
+  "335": 349,
+  "336": 350,
+  "337": 351,
+  "338": 352,
+  "339": 353,
+  "340": 354,
+  "341": 355,
+  "342": 356,
+  "343": 357,
+  "344": 358,
+  "345": 359,
+  "346": 360,
+  "347": 361,
+  "348": 362,
+  "349": 363,
+  "350": 364,
+  "351": 365,
+  "352": 366,
+  "353": 367,
+  "354": 368,
+  "355": 369,
+  "356": 370,
+  "357": 371,
+  "358": 372,
+  "359": 374,
+  "360": 375,
+  "361": 376,
+  "362": 377,
+  "363": 378,
+  "364": 379,
+  "365": 381,
+  "366": 382,
+  "367": 383,
+  "368": 384,
+  "369": 385,
+  "370": 386,
+  "371": 387,
+  "372": 388,
+  "373": 389,
+  "374": 390,
+  "375": 391,
+  "376": 392,
+  "377": 393,
+  "378": 394,
+  "379": 395,
+  "380": 396,
+  "381": 397,
+  "382": 398,
+  "383": 399,
+  "384": 400,
+  "385": 401,
+  "386": 402,
+  "387": 403,
+  "388": 404,
+  "389": 405,
+  "390": 406,
+  "391": 407,
+  "392": 408,
+  "393": 409,
+  "394": 410,
+  "395": 412,
+  "396": 413,
+  "397": 414,
+  "398": 415,
+  "399": 416,
+  "400": 418,
+  "401": 420,
+  "402": 421,
+  "403": 422,
+  "404": 423,
+  "405": 424,
+  "406": 425,
+  "407": 426,
+  "408": 427,
+  "409": 428,
+  "410": 429,
+  "411": 430,
+  "412": 431,
+  "413": 432,
+  "414": 433,
+  "415": 434,
+  "416": 435,
+  "417": 436,
+  "418": 437,
+  "419": 438,
+  "420": 439,
+  "421": 440,
+  "422": 441,
+  "423": 442,
+  "424": 443,
+  "425": 444,
+  "426": 445,
+  "427": 446,
+  "428": 447,
+  "429": 448,
+  "430": 449,
+  "431": 450,
+  "432": 451,
+  "433": 452,
+  "434": 453,
+  "435": 454,
+  "436": 455,
+  "437": 456,
+  "438": 457,
+  "439": 458,
+  "440": 459,
+  "441": 461,
+  "442": 463,
+  "443": 464,
+  "444": 465,
+  "445": 466,
+  "446": 467,
+  "447": 468,
+  "448": 469,
+  "449": 470,
+  "450": 471,
+  "451": 472,
+  "452": 473,
+  "453": 474,
+  "454": 475,
+  "455": 476,
+  "456": 477,
+  "457": 478,
+  "458": 479,
+  "459": 480,
+  "460": 481,
+  "461": 482,
+  "462": 483,
+  "463": 484,
+  "464": 485,
+  "465": 487,
+  "466": 488,
+  "467": 490,
+  "468": 492,
+  "469": 493,
+  "470": 494,
+  "471": 496,
+  "472": 497,
+  "473": 498,
+  "474": 499,
+  "475": 500,
+  "476": 501,
+  "477": 502,
+  "478": 503,
+  "479": 504,
+  "480": 505,
+  "481": 506,
+  "482": 507,
+  "483": 508,
+  "484": 509,
+  "485": 510,
+  "486": 512,
+  "487": 513,
+  "488": 514,
+  "489": 515,
+  "490": 516,
+  "491": 517,
+  "492": 518,
+  "493": 519,
+  "494": 520,
+  "495": 521,
+  "496": 522,
+  "497": 523,
+  "498": 524,
+  "499": 525,
+  "500": 526,
+  "501": 527,
+  "502": 528,
+  "503": 529,
+  "504": 530,
+  "505": 531,
+  "506": 532,
+  "507": 533,
+  "508": 534,
+  "509": 535,
+  "510": 536,
+  "511": 537,
+  "512": 538,
+  "513": 539,
+  "514": 540,
+  "515": 541,
+  "516": 542,
+  "517": 543,
+  "518": 544,
+  "519": 545,
+  "520": 546,
+  "521": 547,
+  "522": 548,
+  "523": 549,
+  "524": 550,
+  "525": 551,
+  "526": 552,
+  "527": 553,
+  "528": 554,
+  "529": 555,
+  "530": 556,
+  "531": 557,
+  "532": 559,
+  "533": 561,
+  "534": 562,
+  "535": 563,
+  "536": 564,
+  "537": 565,
+  "538": 566,
+  "539": 567,
+  "540": 568,
+  "541": 569,
+  "542": 570,
+  "543": 571,
+  "544": 572,
+  "545": 573,
+  "546": 574,
+  "547": 575,
+  "548": 576,
+  "549": 577,
+  "550": 578,
+  "551": 579,
+  "552": 580,
+  "553": 581,
+  "554": 582,
+  "555": 583,
+  "556": 584,
+  "557": 585,
+  "558": 586,
+  "559": 587,
+  "560": 588,
+  "561": 589,
+  "562": 590,
+  "563": 591,
+  "564": 592,
+  "565": 593,
+  "566": 594,
+  "567": 595,
+  "568": 596,
+  "569": 597,
+  "570": 598,
+  "571": 599,
+  "572": 600,
+  "573": 601,
+  "574": 602,
+  "575": 603,
+  "576": 604,
+  "577": 605,
+  "578": 606,
+  "579": 607,
+  "580": 608,
+  "581": 609,
+  "582": 610,
+  "583": 611,
+  "584": 612,
+  "585": 613,
+  "586": 614,
+  "587": 615,
+  "588": 616,
+  "589": 618,
+  "590": 620,
+  "591": 621,
+  "592": 622,
+  "593": 623,
+  "594": 624,
+  "595": 625,
+  "596": 626,
+  "597": 627,
+  "598": 628,
+  "599": 629,
+  "600": 630,
+  "601": 631,
+  "602": 632,
+  "603": 633,
+  "604": 634,
+  "605": 635,
+  "606": 636,
+  "607": 637,
+  "608": 639,
+  "609": 640,
+  "610": 641,
+  "611": 643,
+  "612": 645,
+  "613": 647,
+  "614": 649,
+  "615": 651,
+  "616": 653,
+  "617": 654,
+  "618": 655,
+  "619": 656,
+  "620": 657,
+  "621": 658,
+  "622": 659,
+  "623": 660,
+  "624": 661,
+  "625": 662,
+  "626": 663,
+  "627": 664,
+  "628": 665,
+  "629": 666,
+  "630": 667,
+  "631": 668,
+  "632": 670,
+  "633": 671,
+  "634": 672,
+  "635": 673,
+  "636": 674,
+  "637": 675,
+  "638": 676,
+  "639": 677,
+  "640": 678,
+  "641": 679,
+  "642": 680,
+  "643": 681,
+  "644": 682,
+  "645": 683,
+  "646": 684,
+  "647": 685,
+  "648": 686,
+  "649": 687,
+  "650": 688,
+  "651": 689,
+  "652": 690,
+  "653": 691,
+  "654": 692,
+  "655": 693,
+  "656": 694,
+  "657": 695,
+  "658": 696,
+  "659": 697,
+  "660": 698,
+  "661": 699,
+  "662": 700,
+  "663": 702,
+  "664": 703,
+  "665": 705,
+  "666": 707,
+  "667": 708,
+  "668": 709,
+  "669": 710,
+  "670": 711,
+  "671": 712,
+  "672": 713,
+  "673": 714,
+  "674": 715,
+  "675": 716,
+  "676": 717,
+  "677": 718,
+  "678": 719,
+  "679": 720,
+  "680": 721,
+  "681": 722,
+  "682": 723,
+  "683": 724,
+  "684": 725,
+  "685": 726,
+  "686": 727,
+  "687": 728,
+  "688": 729,
+  "689": 730,
+  "690": 732,
+  "691": 734,
+  "692": 735,
+  "693": 736,
+  "694": 738,
+  "695": 739,
+  "696": 740,
+  "697": 741,
+  "698": 742,
+  "699": 743,
+  "700": 744,
+  "701": 745,
+  "702": 746,
+  "703": 747,
+  "704": 748,
+  "705": 749,
+  "706": 750,
+  "707": 751,
+  "708": 752,
+  "709": 753,
+  "710": 754,
+  "711": 755,
+  "712": 756,
+  "713": 757,
+  "714": 758,
+  "715": 759,
+  "716": 760,
+  "717": 761,
+  "718": 762,
+  "719": 763,
+  "720": 764,
+  "721": 765,
+  "722": 766,
+  "723": 767,
+  "724": 768,
+  "725": 769,
+  "726": 770,
+  "727": 771,
+  "728": 772,
+  "729": 773,
+  "730": 774,
+  "731": 775,
+  "732": 776,
+  "733": 777,
+  "734": 779,
+  "735": 781,
+  "736": 782,
+  "737": 783,
+  "738": 784,
+  "739": 785,
+  "740": 786,
+  "741": 787,
+  "742": 788,
+  "743": 789,
+  "744": 790,
+  "745": 791,
+  "746": 792,
+  "747": 793,
+  "748": 794,
+  "749": 795,
+  "750": 796,
+  "751": 797,
+  "752": 798,
+  "753": 799,
+  "754": 800,
+  "755": 801,
+  "756": 802,
+  "757": 803,
+  "758": 804,
+  "759": 805,
+  "760": 806,
+  "761": 807,
+  "762": 808,
+  "763": 809,
+  "764": 810,
+  "765": 811,
+  "766": 812,
+  "767": 813,
+  "768": 814,
+  "769": 815,
+  "770": 816,
+  "771": 817,
+  "772": 818,
+  "773": 819,
+  "774": 820,
+  "775": 821,
+  "776": 822,
+  "777": 823,
+  "778": 824,
+  "779": 825,
+  "780": 826,
+  "781": 827,
+  "782": 828,
+  "783": 829,
+  "784": 830,
+  "785": 831,
+  "786": 832,
+  "787": 833,
+  "788": 834,
+  "789": 835,
+  "790": 836,
+  "791": 837,
+  "792": 839,
+  "793": 840,
+  "794": 841,
+  "795": 842,
+  "796": 843,
+  "797": 844,
+  "798": 845,
+  "799": 846,
+  "800": 847,
+  "801": 848,
+  "802": 849,
+  "803": 850,
+  "804": 851,
+  "805": 852,
+  "806": 853,
+  "807": 854,
+  "808": 855,
+  "809": 856,
+  "810": 857,
+  "811": 858,
+  "812": 859,
+  "813": 860,
+  "814": 861,
+  "815": 862,
+  "816": 863,
+  "817": 864,
+  "818": 865,
+  "819": 867,
+  "820": 869,
+  "821": 870,
+  "822": 871,
+  "823": 872,
+  "824": 873,
+  "825": 874,
+  "826": 875,
+  "827": 876,
+  "828": 877,
+  "829": 878,
+  "830": 879,
+  "831": 880,
+  "832": 881,
+  "833": 882,
+  "834": 883,
+  "835": 884,
+  "836": 885,
+  "837": 886,
+  "838": 887,
+  "839": 888,
+  "840": 889,
+  "841": 890,
+  "842": 891,
+  "843": 892,
+  "844": 893,
+  "845": 894,
+  "846": 895,
+  "847": 896,
+  "848": 897,
+  "849": 898,
+  "850": 899,
+  "851": 900,
+  "852": 901,
+  "853": 902,
+  "854": 903,
+  "855": 904,
+  "856": 905,
+  "857": 906,
+  "858": 907,
+  "859": 908,
+  "860": 909,
+  "861": 910,
+  "862": 911,
+  "863": 912,
+  "864": 913,
+  "865": 914,
+  "866": 915,
+  "867": 916,
+  "868": 917,
+  "869": 918,
+  "870": 919,
+  "871": 920,
+  "872": 921,
+  "873": 922,
+  "874": 923,
+  "875": 924,
+  "876": 925,
+  "877": 926,
+  "878": 927,
+  "879": 928,
+  "880": 929,
+  "881": 930,
+  "882": 931,
+  "883": 932,
+  "884": 933,
+  "885": 934,
+  "886": 935,
+  "887": 936,
+  "888": 937,
+  "889": 938,
+  "890": 939,
+  "891": 940,
+  "892": 941,
+  "893": 942,
+  "894": 943,
+  "895": 944,
+  "896": 945,
+  "897": 946,
+  "898": 947,
+  "899": 948,
+  "900": 949,
+  "901": 950,
+  "902": 951,
+  "903": 952,
+  "904": 953,
+  "905": 954,
+  "906": 955,
+  "907": 956,
+  "908": 958,
+  "909": 959
+}
\ No newline at end of file
diff --git a/data/corpora/cg/debug/PMID-1388088.txt b/data/corpora/cg/debug/PMID-1388088.txt
new file mode 100644
index 0000000..784355b
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-1388088.txt
@@ -0,0 +1,5 @@
+Interleukin - 1 receptor antagonist inhibits ischaemic and excitotoxic neuronal damage in the rat .
+Interleukin - 1 ( IL - 1 ) synthesis in the brain is stimulated by mechanical injury and IL - 1 mimics some effects of injury , such as gliosis and neovascularization .
+We report that neuronal death resulting from focal cerebral ischaemia ( middle cerebral artery occlusion , 24 h ) is significantly inhibited ( by 50 % ) in rats injected with a recombinant IL - 1 receptor antagonist ( IL - 1ra , 10 micrograms , icv 30 min before and 10 min after ischaemia ) .
+Excitotoxic damage due to striatal infusion of an NMDA - receptor agonist ( cis - 2 , 4 - methanoglutamate ) was also markedly inhibited ( 71 % ) by injection of the IL - 1ra .
+These data indicate that endogenous IL - 1 is a mediator of ischaemic and excitotoxic brain damage , and that inhibitors of IL - 1 action may be of therapeutic value in the treatment of acute or chronic neuronal death .
\ No newline at end of file
diff --git a/data/corpora/cg/debug/PMID-1388088.txt.ori b/data/corpora/cg/debug/PMID-1388088.txt.ori
new file mode 100644
index 0000000..e19d3f7
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-1388088.txt.ori
@@ -0,0 +1,2 @@
+Interleukin-1 receptor antagonist inhibits ischaemic and excitotoxic neuronal damage in the rat.
+Interleukin-1 (IL-1) synthesis in the brain is stimulated by mechanical injury and IL-1 mimics some effects of injury, such as gliosis and neovascularization. We report that neuronal death resulting from focal cerebral ischaemia (middle cerebral artery occlusion, 24 h) is significantly inhibited (by 50%) in rats injected with a recombinant IL-1 receptor antagonist (IL-1ra, 10 micrograms, icv 30 min before and 10 min after ischaemia). Excitotoxic damage due to striatal infusion of an NMDA-receptor agonist (cis-2,4-methanoglutamate) was also markedly inhibited (71%) by injection of the IL-1ra. These data indicate that endogenous IL-1 is a mediator of ischaemic and excitotoxic brain damage, and that inhibitors of IL-1 action may be of therapeutic value in the treatment of acute or chronic neuronal death.
diff --git a/data/corpora/cg/debug/PMID-198130.a1 b/data/corpora/cg/debug/PMID-198130.a1
new file mode 100644
index 0000000..c33ed74
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-198130.a1
@@ -0,0 +1,35 @@
+T1	Simple_chemical 13 73	malate - aspartate reduced nicotinamide adenine dinucleotide
+T2	Cell 111 122	tumor cells
+T3	Simple_chemical 155 158	CO2
+T4	Simple_chemical 163 170	lactate
+T5	Simple_chemical 268 286	tricarboxylic acid
+T6	Organism 320 326	rodent
+T7	Cell 327 346	ascites tumor cells
+T8	Cell 364 391	Ehrlich ascites tumor cells
+T9	Cell 394 412	Krebs II carcinoma
+T10	Cell 415 433	AS - 30D carcinoma
+T11	Cell 440 451	L1210 cells
+T12	Simple_chemical 513 532	D - [ 14C ] glucose
+T13	Simple_chemical 612 645	nicotinamide adenine dinucleotide
+T14	Simple_chemical 648 652	NADH
+T15	Cell 672 677	cells
+T16	Simple_chemical 737 743	malate
+T17	Simple_chemical 746 755	aspartate
+T18	Cell 910 920	cell lines
+T19	Simple_chemical 982 1000	tricarboxylic acid
+T20	Simple_chemical 1009 1017	electron
+T21	Simple_chemical 1038 1044	malate
+T22	Simple_chemical 1047 1056	aspartate
+T23	Cancer 1074 1080	tumors
+T24	Simple_chemical 1110 1114	NADH
+T25	Cellular_component 1136 1149	mitochondrial
+T26	Simple_chemical 1176 1180	NADH
+T27	Simple_chemical 1278 1284	oxygen
+T28	Cancer 1294 1300	tumors
+T29	Simple_chemical 1345 1367	adenosine triphosphate
+T30	Simple_chemical 1445 1467	adenosine triphosphate
+T31	Simple_chemical 1484 1492	electron
+T32	Cellular_component 1515 1524	cytosolic
+T33	Simple_chemical 1525 1529	NADH
+T34	Simple_chemical 1538 1544	malate
+T35	Simple_chemical 1547 1556	aspartate
diff --git a/data/corpora/cg/debug/PMID-198130.a2 b/data/corpora/cg/debug/PMID-198130.a2
new file mode 100644
index 0000000..c3ebf95
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-198130.a2
@@ -0,0 +1,39 @@
+*	Equiv T13 T14
+T36	Pathway 74 81	shuttle
+T37	Synthesis 171 181	production
+T38	Glycolysis 244 263	glycolytic sequence
+T39	Pathway 287 292	cycle
+T40	Planned_process 454 463	incubated
+T41	Negative_regulation 604 611	reduced
+T42	Pathway 655 662	shuttle
+T43	Positive_regulation 678 686	requires
+T44	Pathway 756 763	shuttle
+T45	Glycolysis 965 975	glycolysis
+T46	Pathway 1001 1006	cycle
+T47	Localization 1018 1027	transport
+T48	Pathway 1057 1064	shuttle
+T49	Pathway 1181 1188	shuttle
+T50	Synthesis 1368 1377	synthesis
+T51	Synthesis 1471 1480	generated
+T52	Pathway 1557 1564	shuttle
+E1	Pathway:T36 Participant:T1
+E2	Synthesis:T37 Theme:T3
+E3	Synthesis:T37 Theme:T4
+E4	Glycolysis:T38
+E5	Pathway:T39 Theme:T5
+E6	Planned_process:T40 Instrument:T12 Theme:T8
+E7	Planned_process:T40 Instrument:T12 Theme:T9
+E8	Planned_process:T40 Instrument:T12 Theme:T10
+E9	Planned_process:T40 Instrument:T12 Theme:T11
+E10	Negative_regulation:T41 Theme:E11
+E11	Pathway:T42 Participant:T13
+E12	Positive_regulation:T43 Theme:E11 Cause:E13
+E13	Pathway:T44 Participant:T16 Participant2:T17
+E14	Glycolysis:T45
+E15	Pathway:T46 Participant:T19
+E16	Localization:T47 Theme:T20
+E17	Pathway:T48 Participant:T21 Participant2:T22
+E18	Pathway:T49 Participant:T26
+E19	Synthesis:T50 Theme:T29
+E20	Synthesis:T51 Theme:T30
+E21	Pathway:T52 Participant:T34 Participant2:T35
diff --git a/data/corpora/cg/debug/PMID-198130.ann b/data/corpora/cg/debug/PMID-198130.ann
new file mode 100644
index 0000000..dff9359
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-198130.ann
@@ -0,0 +1,98 @@
+*	Equiv T13 T14
+T1	Simple_chemical 13 73	malate - aspartate reduced nicotinamide adenine dinucleotide
+T2	Cell 111 122	tumor cells
+T3	Simple_chemical 155 158	CO2
+T4	Simple_chemical 163 170	lactate
+T5	Simple_chemical 268 286	tricarboxylic acid
+T6	Organism 320 326	rodent
+T7	Cell 327 346	ascites tumor cells
+T8	Cell 364 391	Ehrlich ascites tumor cells
+T9	Cell 394 412	Krebs II carcinoma
+T10	Cell 415 433	AS - 30D carcinoma
+T11	Cell 440 451	L1210 cells
+T12	Simple_chemical 513 532	D - [ 14C ] glucose
+T13	Simple_chemical 612 645	nicotinamide adenine dinucleotide
+T14	Simple_chemical 648 652	NADH
+T15	Cell 672 677	cells
+T16	Simple_chemical 737 743	malate
+T17	Simple_chemical 746 755	aspartate
+T18	Cell 910 920	cell lines
+T19	Simple_chemical 982 1000	tricarboxylic acid
+T20	Simple_chemical 1009 1017	electron
+T21	Simple_chemical 1038 1044	malate
+T22	Simple_chemical 1047 1056	aspartate
+T23	Cancer 1074 1080	tumors
+T24	Simple_chemical 1110 1114	NADH
+T25	Cellular_component 1136 1149	mitochondrial
+T26	Simple_chemical 1176 1180	NADH
+T27	Simple_chemical 1278 1284	oxygen
+T28	Cancer 1294 1300	tumors
+T29	Simple_chemical 1345 1367	adenosine triphosphate
+T30	Simple_chemical 1445 1467	adenosine triphosphate
+T31	Simple_chemical 1484 1492	electron
+T32	Cellular_component 1515 1524	cytosolic
+T33	Simple_chemical 1525 1529	NADH
+T34	Simple_chemical 1538 1544	malate
+T35	Simple_chemical 1547 1556	aspartate
+TR36	Pathway 74 81	shuttle
+TR37	Synthesis 171 181	production
+TR38	Glycolysis 244 263	glycolytic sequence
+TR39	Pathway 287 292	cycle
+TR40	Planned_process 454 463	incubated
+TR41	Negative_regulation 604 611	reduced
+TR42	Pathway 655 662	shuttle
+TR43	Positive_regulation 678 686	requires
+TR44	Pathway 756 763	shuttle
+TR45	Glycolysis 965 975	glycolysis
+TR46	Pathway 1001 1006	cycle
+TR47	Localization 1018 1027	transport
+TR48	Pathway 1057 1064	shuttle
+TR49	Pathway 1181 1188	shuttle
+TR50	Synthesis 1368 1377	synthesis
+TR51	Synthesis 1471 1480	generated
+TR52	Pathway 1557 1564	shuttle
+R1	Participant Arg1:TR48 Arg2:T21
+R2	Theme Arg1:TR37 Arg2:T3
+R3	Theme Arg1:TR50 Arg2:T29
+R4	Participant Arg1:TR44 Arg2:T17
+R5	Theme Arg1:TR51 Arg2:T30
+R6	Theme Arg1:TR40 Arg2:T8
+R7	Participant Arg1:TR42 Arg2:T13
+R8	Participant Arg1:TR48 Arg2:T22
+R9	Participant Arg1:TR52 Arg2:T35
+R10	Theme Arg1:TR40 Arg2:T10
+R11	Cause Arg1:TR43 Arg2:TR44
+R12	Participant Arg1:TR44 Arg2:T16
+R13	Theme Arg1:TR43 Arg2:TR42
+R14	Instrument Arg1:TR40 Arg2:T12
+R15	Theme Arg1:TR40 Arg2:T11
+R16	Theme Arg1:TR47 Arg2:T20
+R17	Theme Arg1:TR39 Arg2:T5
+R18	Theme Arg1:TR40 Arg2:T9
+R19	Participant Arg1:TR36 Arg2:T1
+R20	Theme Arg1:TR37 Arg2:T4
+R21	Participant Arg1:TR52 Arg2:T34
+R22	Participant Arg1:TR49 Arg2:T26
+R23	Participant Arg1:TR46 Arg2:T19
+R24	Theme Arg1:TR41 Arg2:TR42
+E1	Pathway:TR36 Participant:T1
+E2	Synthesis:TR37 Theme:T3
+E3	Synthesis:TR37 Theme:T4
+E4	Glycolysis:TR38
+E5	Pathway:TR39 Theme:T5
+E6	Planned_process:TR40 Instrument:T12 Theme:T8
+E7	Planned_process:TR40 Instrument:T12 Theme:T9
+E8	Planned_process:TR40 Instrument:T12 Theme:T10
+E9	Planned_process:TR40 Instrument:T12 Theme:T11
+E10	Negative_regulation:TR41 Theme:E11
+E11	Pathway:TR42 Participant:T13
+E12	Positive_regulation:TR43 Theme:E11 Cause:E13
+E13	Pathway:TR44 Participant:T16 Participant2:T17
+E14	Glycolysis:TR45
+E15	Pathway:TR46 Participant:T19
+E16	Localization:TR47 Theme:T20
+E17	Pathway:TR48 Participant:T21 Participant2:T22
+E18	Pathway:TR49 Participant:T26
+E19	Synthesis:TR50 Theme:T29
+E20	Synthesis:TR51 Theme:T30
+E21	Pathway:TR52 Participant:T34 Participant2:T35
diff --git a/data/corpora/cg/debug/PMID-198130.inv.map b/data/corpora/cg/debug/PMID-198130.inv.map
new file mode 100644
index 0000000..11bfef1
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-198130.inv.map
@@ -0,0 +1,1779 @@
+{
+  "0": 0,
+  "1": 1,
+  "2": 2,
+  "3": 3,
+  "4": 4,
+  "5": 5,
+  "6": 6,
+  "7": 7,
+  "8": 8,
+  "9": 9,
+  "10": 10,
+  "11": 11,
+  "12": 12,
+  "13": 13,
+  "14": 14,
+  "15": 15,
+  "16": 16,
+  "17": 17,
+  "18": 18,
+  "19": 19,
+  "20": 19,
+  "21": 20,
+  "22": 20,
+  "23": 21,
+  "24": 22,
+  "25": 23,
+  "26": 24,
+  "27": 25,
+  "28": 26,
+  "29": 27,
+  "30": 28,
+  "31": 29,
+  "32": 30,
+  "33": 31,
+  "34": 32,
+  "35": 33,
+  "36": 34,
+  "37": 35,
+  "38": 36,
+  "39": 37,
+  "40": 38,
+  "41": 39,
+  "42": 40,
+  "43": 41,
+  "44": 42,
+  "45": 43,
+  "46": 44,
+  "47": 45,
+  "48": 46,
+  "49": 47,
+  "50": 48,
+  "51": 49,
+  "52": 50,
+  "53": 51,
+  "54": 52,
+  "55": 53,
+  "56": 54,
+  "57": 55,
+  "58": 56,
+  "59": 57,
+  "60": 58,
+  "61": 59,
+  "62": 60,
+  "63": 61,
+  "64": 62,
+  "65": 63,
+  "66": 64,
+  "67": 65,
+  "68": 66,
+  "69": 67,
+  "70": 68,
+  "71": 69,
+  "72": 70,
+  "73": 71,
+  "74": 72,
+  "75": 73,
+  "76": 74,
+  "77": 75,
+  "78": 76,
+  "79": 77,
+  "80": 78,
+  "81": 79,
+  "82": 80,
+  "83": 81,
+  "84": 82,
+  "85": 83,
+  "86": 84,
+  "87": 85,
+  "88": 86,
+  "89": 87,
+  "90": 88,
+  "91": 89,
+  "92": 90,
+  "93": 91,
+  "94": 92,
+  "95": 93,
+  "96": 94,
+  "97": 95,
+  "98": 96,
+  "99": 97,
+  "100": 98,
+  "101": 99,
+  "102": 100,
+  "103": 101,
+  "104": 102,
+  "105": 103,
+  "106": 104,
+  "107": 105,
+  "108": 106,
+  "109": 107,
+  "110": 108,
+  "111": 109,
+  "112": 110,
+  "113": 111,
+  "114": 112,
+  "115": 113,
+  "116": 114,
+  "117": 115,
+  "118": 116,
+  "119": 117,
+  "120": 118,
+  "121": 119,
+  "122": 120,
+  "123": 120,
+  "124": 121,
+  "125": 122,
+  "126": 123,
+  "127": 124,
+  "128": 125,
+  "129": 126,
+  "130": 127,
+  "131": 128,
+  "132": 129,
+  "133": 130,
+  "134": 131,
+  "135": 132,
+  "136": 133,
+  "137": 134,
+  "138": 135,
+  "139": 136,
+  "140": 137,
+  "141": 138,
+  "142": 139,
+  "143": 140,
+  "144": 141,
+  "145": 142,
+  "146": 143,
+  "147": 144,
+  "148": 145,
+  "149": 146,
+  "150": 147,
+  "151": 148,
+  "152": 149,
+  "153": 149,
+  "154": 150,
+  "155": 151,
+  "156": 152,
+  "157": 153,
+  "158": 154,
+  "159": 155,
+  "160": 156,
+  "161": 157,
+  "162": 158,
+  "163": 159,
+  "164": 160,
+  "165": 161,
+  "166": 162,
+  "167": 163,
+  "168": 164,
+  "169": 165,
+  "170": 166,
+  "171": 167,
+  "172": 168,
+  "173": 169,
+  "174": 170,
+  "175": 171,
+  "176": 172,
+  "177": 173,
+  "178": 174,
+  "179": 175,
+  "180": 176,
+  "181": 177,
+  "182": 177,
+  "183": 178,
+  "184": 179,
+  "185": 180,
+  "186": 181,
+  "187": 182,
+  "188": 183,
+  "189": 184,
+  "190": 185,
+  "191": 186,
+  "192": 187,
+  "193": 188,
+  "194": 189,
+  "195": 190,
+  "196": 191,
+  "197": 192,
+  "198": 193,
+  "199": 194,
+  "200": 195,
+  "201": 196,
+  "202": 197,
+  "203": 198,
+  "204": 199,
+  "205": 200,
+  "206": 201,
+  "207": 202,
+  "208": 203,
+  "209": 204,
+  "210": 205,
+  "211": 206,
+  "212": 207,
+  "213": 208,
+  "214": 209,
+  "215": 210,
+  "216": 211,
+  "217": 212,
+  "218": 213,
+  "219": 214,
+  "220": 215,
+  "221": 216,
+  "222": 217,
+  "223": 218,
+  "224": 219,
+  "225": 220,
+  "226": 221,
+  "227": 222,
+  "228": 223,
+  "229": 224,
+  "230": 225,
+  "231": 226,
+  "232": 227,
+  "233": 228,
+  "234": 229,
+  "235": 230,
+  "236": 231,
+  "237": 232,
+  "238": 233,
+  "239": 234,
+  "240": 235,
+  "241": 236,
+  "242": 237,
+  "243": 238,
+  "244": 239,
+  "245": 240,
+  "246": 241,
+  "247": 242,
+  "248": 243,
+  "249": 244,
+  "250": 245,
+  "251": 246,
+  "252": 247,
+  "253": 248,
+  "254": 249,
+  "255": 250,
+  "256": 251,
+  "257": 252,
+  "258": 253,
+  "259": 254,
+  "260": 255,
+  "261": 256,
+  "262": 257,
+  "263": 258,
+  "264": 259,
+  "265": 260,
+  "266": 261,
+  "267": 262,
+  "268": 263,
+  "269": 264,
+  "270": 265,
+  "271": 266,
+  "272": 267,
+  "273": 268,
+  "274": 269,
+  "275": 270,
+  "276": 271,
+  "277": 272,
+  "278": 273,
+  "279": 274,
+  "280": 275,
+  "281": 276,
+  "282": 277,
+  "283": 278,
+  "284": 279,
+  "285": 280,
+  "286": 281,
+  "287": 282,
+  "288": 283,
+  "289": 284,
+  "290": 285,
+  "291": 286,
+  "292": 287,
+  "293": 288,
+  "294": 289,
+  "295": 290,
+  "296": 291,
+  "297": 292,
+  "298": 293,
+  "299": 294,
+  "300": 295,
+  "301": 296,
+  "302": 297,
+  "303": 298,
+  "304": 299,
+  "305": 300,
+  "306": 301,
+  "307": 302,
+  "308": 303,
+  "309": 304,
+  "310": 305,
+  "311": 306,
+  "312": 307,
+  "313": 308,
+  "314": 309,
+  "315": 310,
+  "316": 311,
+  "317": 312,
+  "318": 313,
+  "319": 314,
+  "320": 315,
+  "321": 316,
+  "322": 317,
+  "323": 318,
+  "324": 319,
+  "325": 320,
+  "326": 321,
+  "327": 322,
+  "328": 323,
+  "329": 324,
+  "330": 325,
+  "331": 326,
+  "332": 327,
+  "333": 328,
+  "334": 329,
+  "335": 330,
+  "336": 331,
+  "337": 332,
+  "338": 333,
+  "339": 334,
+  "340": 335,
+  "341": 336,
+  "342": 337,
+  "343": 338,
+  "344": 339,
+  "345": 340,
+  "346": 341,
+  "347": 342,
+  "348": 343,
+  "349": 343,
+  "350": 344,
+  "351": 345,
+  "352": 346,
+  "353": 347,
+  "354": 348,
+  "355": 349,
+  "356": 350,
+  "357": 351,
+  "358": 352,
+  "359": 353,
+  "360": 354,
+  "361": 355,
+  "362": 356,
+  "363": 357,
+  "364": 358,
+  "365": 359,
+  "366": 360,
+  "367": 361,
+  "368": 362,
+  "369": 363,
+  "370": 364,
+  "371": 365,
+  "372": 366,
+  "373": 367,
+  "374": 368,
+  "375": 369,
+  "376": 370,
+  "377": 371,
+  "378": 372,
+  "379": 373,
+  "380": 374,
+  "381": 375,
+  "382": 376,
+  "383": 377,
+  "384": 378,
+  "385": 379,
+  "386": 380,
+  "387": 381,
+  "388": 382,
+  "389": 383,
+  "390": 384,
+  "391": 385,
+  "392": 385,
+  "393": 386,
+  "394": 387,
+  "395": 388,
+  "396": 389,
+  "397": 390,
+  "398": 391,
+  "399": 392,
+  "400": 393,
+  "401": 394,
+  "402": 395,
+  "403": 396,
+  "404": 397,
+  "405": 398,
+  "406": 399,
+  "407": 400,
+  "408": 401,
+  "409": 402,
+  "410": 403,
+  "411": 404,
+  "412": 405,
+  "413": 405,
+  "414": 406,
+  "415": 407,
+  "416": 408,
+  "417": 409,
+  "418": 409,
+  "419": 410,
+  "420": 410,
+  "421": 411,
+  "422": 412,
+  "423": 413,
+  "424": 414,
+  "425": 415,
+  "426": 416,
+  "427": 417,
+  "428": 418,
+  "429": 419,
+  "430": 420,
+  "431": 421,
+  "432": 422,
+  "433": 423,
+  "434": 423,
+  "435": 424,
+  "436": 425,
+  "437": 426,
+  "438": 427,
+  "439": 428,
+  "440": 429,
+  "441": 430,
+  "442": 431,
+  "443": 432,
+  "444": 433,
+  "445": 434,
+  "446": 435,
+  "447": 436,
+  "448": 437,
+  "449": 438,
+  "450": 439,
+  "451": 440,
+  "452": 440,
+  "453": 441,
+  "454": 442,
+  "455": 443,
+  "456": 444,
+  "457": 445,
+  "458": 446,
+  "459": 447,
+  "460": 448,
+  "461": 449,
+  "462": 450,
+  "463": 451,
+  "464": 452,
+  "465": 453,
+  "466": 454,
+  "467": 455,
+  "468": 456,
+  "469": 457,
+  "470": 458,
+  "471": 459,
+  "472": 460,
+  "473": 461,
+  "474": 462,
+  "475": 463,
+  "476": 464,
+  "477": 465,
+  "478": 466,
+  "479": 467,
+  "480": 468,
+  "481": 469,
+  "482": 470,
+  "483": 471,
+  "484": 472,
+  "485": 473,
+  "486": 474,
+  "487": 475,
+  "488": 476,
+  "489": 477,
+  "490": 478,
+  "491": 479,
+  "492": 480,
+  "493": 481,
+  "494": 482,
+  "495": 483,
+  "496": 484,
+  "497": 485,
+  "498": 486,
+  "499": 487,
+  "500": 488,
+  "501": 489,
+  "502": 490,
+  "503": 491,
+  "504": 492,
+  "505": 493,
+  "506": 494,
+  "507": 495,
+  "508": 496,
+  "509": 497,
+  "510": 498,
+  "511": 499,
+  "512": 500,
+  "513": 501,
+  "514": 502,
+  "515": 502,
+  "516": 503,
+  "517": 503,
+  "518": 504,
+  "519": 504,
+  "520": 505,
+  "521": 506,
+  "522": 507,
+  "523": 507,
+  "524": 508,
+  "525": 508,
+  "526": 509,
+  "527": 510,
+  "528": 511,
+  "529": 512,
+  "530": 513,
+  "531": 514,
+  "532": 515,
+  "533": 515,
+  "534": 516,
+  "535": 517,
+  "536": 518,
+  "537": 519,
+  "538": 520,
+  "539": 521,
+  "540": 522,
+  "541": 523,
+  "542": 524,
+  "543": 525,
+  "544": 526,
+  "545": 527,
+  "546": 528,
+  "547": 529,
+  "548": 530,
+  "549": 531,
+  "550": 532,
+  "551": 532,
+  "552": 533,
+  "553": 534,
+  "554": 535,
+  "555": 536,
+  "556": 537,
+  "557": 538,
+  "558": 539,
+  "559": 540,
+  "560": 541,
+  "561": 542,
+  "562": 543,
+  "563": 544,
+  "564": 545,
+  "565": 546,
+  "566": 547,
+  "567": 548,
+  "568": 549,
+  "569": 550,
+  "570": 551,
+  "571": 552,
+  "572": 553,
+  "573": 554,
+  "574": 555,
+  "575": 556,
+  "576": 557,
+  "577": 558,
+  "578": 559,
+  "579": 560,
+  "580": 561,
+  "581": 562,
+  "582": 563,
+  "583": 564,
+  "584": 565,
+  "585": 566,
+  "586": 567,
+  "587": 568,
+  "588": 569,
+  "589": 570,
+  "590": 571,
+  "591": 572,
+  "592": 573,
+  "593": 574,
+  "594": 575,
+  "595": 576,
+  "596": 577,
+  "597": 578,
+  "598": 579,
+  "599": 580,
+  "600": 581,
+  "601": 582,
+  "602": 583,
+  "603": 584,
+  "604": 585,
+  "605": 586,
+  "606": 587,
+  "607": 588,
+  "608": 589,
+  "609": 590,
+  "610": 591,
+  "611": 592,
+  "612": 593,
+  "613": 594,
+  "614": 595,
+  "615": 596,
+  "616": 597,
+  "617": 598,
+  "618": 599,
+  "619": 600,
+  "620": 601,
+  "621": 602,
+  "622": 603,
+  "623": 604,
+  "624": 605,
+  "625": 606,
+  "626": 607,
+  "627": 608,
+  "628": 609,
+  "629": 610,
+  "630": 611,
+  "631": 612,
+  "632": 613,
+  "633": 614,
+  "634": 615,
+  "635": 616,
+  "636": 617,
+  "637": 618,
+  "638": 619,
+  "639": 620,
+  "640": 621,
+  "641": 622,
+  "642": 623,
+  "643": 624,
+  "644": 625,
+  "645": 626,
+  "646": 627,
+  "647": 628,
+  "648": 628,
+  "649": 629,
+  "650": 630,
+  "651": 631,
+  "652": 632,
+  "653": 632,
+  "654": 633,
+  "655": 634,
+  "656": 635,
+  "657": 636,
+  "658": 637,
+  "659": 638,
+  "660": 639,
+  "661": 640,
+  "662": 641,
+  "663": 642,
+  "664": 643,
+  "665": 644,
+  "666": 645,
+  "667": 646,
+  "668": 647,
+  "669": 648,
+  "670": 649,
+  "671": 650,
+  "672": 651,
+  "673": 652,
+  "674": 653,
+  "675": 654,
+  "676": 655,
+  "677": 656,
+  "678": 657,
+  "679": 658,
+  "680": 659,
+  "681": 660,
+  "682": 661,
+  "683": 662,
+  "684": 663,
+  "685": 664,
+  "686": 665,
+  "687": 666,
+  "688": 667,
+  "689": 668,
+  "690": 669,
+  "691": 670,
+  "692": 671,
+  "693": 672,
+  "694": 673,
+  "695": 674,
+  "696": 675,
+  "697": 676,
+  "698": 677,
+  "699": 678,
+  "700": 679,
+  "701": 680,
+  "702": 681,
+  "703": 682,
+  "704": 683,
+  "705": 684,
+  "706": 685,
+  "707": 686,
+  "708": 687,
+  "709": 688,
+  "710": 689,
+  "711": 690,
+  "712": 691,
+  "713": 692,
+  "714": 693,
+  "715": 694,
+  "716": 695,
+  "717": 696,
+  "718": 697,
+  "719": 698,
+  "720": 699,
+  "721": 700,
+  "722": 701,
+  "723": 702,
+  "724": 703,
+  "725": 704,
+  "726": 705,
+  "727": 706,
+  "728": 707,
+  "729": 708,
+  "730": 709,
+  "731": 710,
+  "732": 711,
+  "733": 712,
+  "734": 713,
+  "735": 714,
+  "736": 715,
+  "737": 716,
+  "738": 717,
+  "739": 718,
+  "740": 719,
+  "741": 720,
+  "742": 721,
+  "743": 722,
+  "744": 722,
+  "745": 723,
+  "746": 723,
+  "747": 724,
+  "748": 725,
+  "749": 726,
+  "750": 727,
+  "751": 728,
+  "752": 729,
+  "753": 730,
+  "754": 731,
+  "755": 732,
+  "756": 733,
+  "757": 734,
+  "758": 735,
+  "759": 736,
+  "760": 737,
+  "761": 738,
+  "762": 739,
+  "763": 740,
+  "764": 741,
+  "765": 742,
+  "766": 742,
+  "767": 743,
+  "768": 743,
+  "769": 744,
+  "770": 744,
+  "771": 745,
+  "772": 745,
+  "773": 746,
+  "774": 746,
+  "775": 747,
+  "776": 747,
+  "777": 748,
+  "778": 749,
+  "779": 750,
+  "780": 751,
+  "781": 752,
+  "782": 753,
+  "783": 754,
+  "784": 755,
+  "785": 756,
+  "786": 757,
+  "787": 758,
+  "788": 759,
+  "789": 760,
+  "790": 761,
+  "791": 762,
+  "792": 763,
+  "793": 764,
+  "794": 765,
+  "795": 765,
+  "796": 766,
+  "797": 766,
+  "798": 767,
+  "799": 767,
+  "800": 768,
+  "801": 769,
+  "802": 770,
+  "803": 771,
+  "804": 772,
+  "805": 773,
+  "806": 774,
+  "807": 775,
+  "808": 776,
+  "809": 777,
+  "810": 778,
+  "811": 778,
+  "812": 779,
+  "813": 780,
+  "814": 781,
+  "815": 782,
+  "816": 783,
+  "817": 784,
+  "818": 785,
+  "819": 786,
+  "820": 787,
+  "821": 788,
+  "822": 789,
+  "823": 790,
+  "824": 790,
+  "825": 791,
+  "826": 791,
+  "827": 792,
+  "828": 793,
+  "829": 794,
+  "830": 795,
+  "831": 795,
+  "832": 796,
+  "833": 797,
+  "834": 798,
+  "835": 799,
+  "836": 800,
+  "837": 801,
+  "838": 801,
+  "839": 802,
+  "840": 802,
+  "841": 803,
+  "842": 804,
+  "843": 805,
+  "844": 806,
+  "845": 806,
+  "846": 807,
+  "847": 808,
+  "848": 809,
+  "849": 810,
+  "850": 811,
+  "851": 812,
+  "852": 812,
+  "853": 813,
+  "854": 813,
+  "855": 814,
+  "856": 815,
+  "857": 816,
+  "858": 817,
+  "859": 818,
+  "860": 819,
+  "861": 820,
+  "862": 821,
+  "863": 822,
+  "864": 823,
+  "865": 824,
+  "866": 825,
+  "867": 826,
+  "868": 827,
+  "869": 828,
+  "870": 829,
+  "871": 830,
+  "872": 831,
+  "873": 832,
+  "874": 833,
+  "875": 834,
+  "876": 835,
+  "877": 836,
+  "878": 837,
+  "879": 838,
+  "880": 839,
+  "881": 840,
+  "882": 841,
+  "883": 842,
+  "884": 843,
+  "885": 844,
+  "886": 845,
+  "887": 846,
+  "888": 847,
+  "889": 848,
+  "890": 849,
+  "891": 850,
+  "892": 851,
+  "893": 852,
+  "894": 853,
+  "895": 854,
+  "896": 855,
+  "897": 856,
+  "898": 857,
+  "899": 858,
+  "900": 859,
+  "901": 860,
+  "902": 861,
+  "903": 862,
+  "904": 863,
+  "905": 864,
+  "906": 865,
+  "907": 866,
+  "908": 867,
+  "909": 868,
+  "910": 869,
+  "911": 870,
+  "912": 871,
+  "913": 872,
+  "914": 873,
+  "915": 874,
+  "916": 875,
+  "917": 876,
+  "918": 877,
+  "919": 878,
+  "920": 879,
+  "921": 879,
+  "922": 880,
+  "923": 881,
+  "924": 882,
+  "925": 883,
+  "926": 884,
+  "927": 885,
+  "928": 886,
+  "929": 887,
+  "930": 888,
+  "931": 889,
+  "932": 890,
+  "933": 891,
+  "934": 892,
+  "935": 893,
+  "936": 894,
+  "937": 895,
+  "938": 896,
+  "939": 897,
+  "940": 898,
+  "941": 899,
+  "942": 900,
+  "943": 901,
+  "944": 902,
+  "945": 903,
+  "946": 904,
+  "947": 905,
+  "948": 906,
+  "949": 907,
+  "950": 908,
+  "951": 909,
+  "952": 910,
+  "953": 911,
+  "954": 912,
+  "955": 913,
+  "956": 914,
+  "957": 915,
+  "958": 916,
+  "959": 917,
+  "960": 918,
+  "961": 919,
+  "962": 920,
+  "963": 921,
+  "964": 922,
+  "965": 923,
+  "966": 924,
+  "967": 925,
+  "968": 926,
+  "969": 927,
+  "970": 928,
+  "971": 929,
+  "972": 930,
+  "973": 931,
+  "974": 932,
+  "975": 933,
+  "976": 933,
+  "977": 934,
+  "978": 935,
+  "979": 936,
+  "980": 937,
+  "981": 938,
+  "982": 939,
+  "983": 940,
+  "984": 941,
+  "985": 942,
+  "986": 943,
+  "987": 944,
+  "988": 945,
+  "989": 946,
+  "990": 947,
+  "991": 948,
+  "992": 949,
+  "993": 950,
+  "994": 951,
+  "995": 952,
+  "996": 953,
+  "997": 954,
+  "998": 955,
+  "999": 956,
+  "1000": 957,
+  "1001": 958,
+  "1002": 959,
+  "1003": 960,
+  "1004": 961,
+  "1005": 962,
+  "1006": 963,
+  "1007": 963,
+  "1008": 964,
+  "1009": 965,
+  "1010": 966,
+  "1011": 967,
+  "1012": 968,
+  "1013": 969,
+  "1014": 970,
+  "1015": 971,
+  "1016": 972,
+  "1017": 973,
+  "1018": 974,
+  "1019": 975,
+  "1020": 976,
+  "1021": 977,
+  "1022": 978,
+  "1023": 979,
+  "1024": 980,
+  "1025": 981,
+  "1026": 982,
+  "1027": 983,
+  "1028": 983,
+  "1029": 984,
+  "1030": 985,
+  "1031": 986,
+  "1032": 987,
+  "1033": 988,
+  "1034": 989,
+  "1035": 990,
+  "1036": 991,
+  "1037": 992,
+  "1038": 993,
+  "1039": 994,
+  "1040": 995,
+  "1041": 996,
+  "1042": 997,
+  "1043": 998,
+  "1044": 999,
+  "1045": 999,
+  "1046": 1000,
+  "1047": 1000,
+  "1048": 1001,
+  "1049": 1002,
+  "1050": 1003,
+  "1051": 1004,
+  "1052": 1005,
+  "1053": 1006,
+  "1054": 1007,
+  "1055": 1008,
+  "1056": 1009,
+  "1057": 1010,
+  "1058": 1011,
+  "1059": 1012,
+  "1060": 1013,
+  "1061": 1014,
+  "1062": 1015,
+  "1063": 1016,
+  "1064": 1017,
+  "1065": 1018,
+  "1066": 1019,
+  "1067": 1020,
+  "1068": 1021,
+  "1069": 1022,
+  "1070": 1023,
+  "1071": 1024,
+  "1072": 1025,
+  "1073": 1026,
+  "1074": 1027,
+  "1075": 1028,
+  "1076": 1029,
+  "1077": 1030,
+  "1078": 1031,
+  "1079": 1032,
+  "1080": 1033,
+  "1081": 1033,
+  "1082": 1034,
+  "1083": 1035,
+  "1084": 1036,
+  "1085": 1037,
+  "1086": 1038,
+  "1087": 1039,
+  "1088": 1040,
+  "1089": 1041,
+  "1090": 1042,
+  "1091": 1043,
+  "1092": 1044,
+  "1093": 1045,
+  "1094": 1046,
+  "1095": 1047,
+  "1096": 1048,
+  "1097": 1049,
+  "1098": 1050,
+  "1099": 1051,
+  "1100": 1052,
+  "1101": 1053,
+  "1102": 1054,
+  "1103": 1055,
+  "1104": 1056,
+  "1105": 1057,
+  "1106": 1058,
+  "1107": 1059,
+  "1108": 1060,
+  "1109": 1061,
+  "1110": 1062,
+  "1111": 1063,
+  "1112": 1064,
+  "1113": 1065,
+  "1114": 1066,
+  "1115": 1067,
+  "1116": 1068,
+  "1117": 1069,
+  "1118": 1070,
+  "1119": 1071,
+  "1120": 1072,
+  "1121": 1073,
+  "1122": 1074,
+  "1123": 1075,
+  "1124": 1076,
+  "1125": 1077,
+  "1126": 1078,
+  "1127": 1079,
+  "1128": 1080,
+  "1129": 1081,
+  "1130": 1082,
+  "1131": 1083,
+  "1132": 1084,
+  "1133": 1085,
+  "1134": 1086,
+  "1135": 1087,
+  "1136": 1088,
+  "1137": 1089,
+  "1138": 1090,
+  "1139": 1091,
+  "1140": 1092,
+  "1141": 1093,
+  "1142": 1094,
+  "1143": 1095,
+  "1144": 1096,
+  "1145": 1097,
+  "1146": 1098,
+  "1147": 1099,
+  "1148": 1100,
+  "1149": 1101,
+  "1150": 1102,
+  "1151": 1103,
+  "1152": 1104,
+  "1153": 1105,
+  "1154": 1106,
+  "1155": 1107,
+  "1156": 1108,
+  "1157": 1109,
+  "1158": 1110,
+  "1159": 1111,
+  "1160": 1112,
+  "1161": 1113,
+  "1162": 1114,
+  "1163": 1115,
+  "1164": 1116,
+  "1165": 1117,
+  "1166": 1118,
+  "1167": 1119,
+  "1168": 1120,
+  "1169": 1121,
+  "1170": 1122,
+  "1171": 1123,
+  "1172": 1124,
+  "1173": 1125,
+  "1174": 1126,
+  "1175": 1127,
+  "1176": 1128,
+  "1177": 1129,
+  "1178": 1130,
+  "1179": 1131,
+  "1180": 1132,
+  "1181": 1133,
+  "1182": 1134,
+  "1183": 1135,
+  "1184": 1136,
+  "1185": 1137,
+  "1186": 1138,
+  "1187": 1139,
+  "1188": 1140,
+  "1189": 1140,
+  "1190": 1141,
+  "1191": 1142,
+  "1192": 1143,
+  "1193": 1144,
+  "1194": 1145,
+  "1195": 1146,
+  "1196": 1147,
+  "1197": 1148,
+  "1198": 1149,
+  "1199": 1150,
+  "1200": 1151,
+  "1201": 1152,
+  "1202": 1153,
+  "1203": 1154,
+  "1204": 1155,
+  "1205": 1156,
+  "1206": 1157,
+  "1207": 1158,
+  "1208": 1159,
+  "1209": 1160,
+  "1210": 1161,
+  "1211": 1162,
+  "1212": 1163,
+  "1213": 1164,
+  "1214": 1165,
+  "1215": 1166,
+  "1216": 1167,
+  "1217": 1168,
+  "1218": 1169,
+  "1219": 1170,
+  "1220": 1171,
+  "1221": 1172,
+  "1222": 1173,
+  "1223": 1174,
+  "1224": 1175,
+  "1225": 1176,
+  "1226": 1177,
+  "1227": 1178,
+  "1228": 1179,
+  "1229": 1180,
+  "1230": 1181,
+  "1231": 1181,
+  "1232": 1182,
+  "1233": 1183,
+  "1234": 1184,
+  "1235": 1185,
+  "1236": 1186,
+  "1237": 1187,
+  "1238": 1188,
+  "1239": 1189,
+  "1240": 1190,
+  "1241": 1191,
+  "1242": 1192,
+  "1243": 1193,
+  "1244": 1194,
+  "1245": 1195,
+  "1246": 1196,
+  "1247": 1197,
+  "1248": 1198,
+  "1249": 1199,
+  "1250": 1200,
+  "1251": 1201,
+  "1252": 1202,
+  "1253": 1203,
+  "1254": 1204,
+  "1255": 1205,
+  "1256": 1206,
+  "1257": 1207,
+  "1258": 1208,
+  "1259": 1209,
+  "1260": 1210,
+  "1261": 1211,
+  "1262": 1212,
+  "1263": 1213,
+  "1264": 1214,
+  "1265": 1215,
+  "1266": 1216,
+  "1267": 1217,
+  "1268": 1218,
+  "1269": 1219,
+  "1270": 1220,
+  "1271": 1221,
+  "1272": 1222,
+  "1273": 1223,
+  "1274": 1224,
+  "1275": 1225,
+  "1276": 1226,
+  "1277": 1227,
+  "1278": 1228,
+  "1279": 1229,
+  "1280": 1230,
+  "1281": 1231,
+  "1282": 1232,
+  "1283": 1233,
+  "1284": 1234,
+  "1285": 1235,
+  "1286": 1236,
+  "1287": 1237,
+  "1288": 1238,
+  "1289": 1239,
+  "1290": 1240,
+  "1291": 1241,
+  "1292": 1242,
+  "1293": 1243,
+  "1294": 1244,
+  "1295": 1245,
+  "1296": 1246,
+  "1297": 1247,
+  "1298": 1248,
+  "1299": 1249,
+  "1300": 1250,
+  "1301": 1250,
+  "1302": 1251,
+  "1303": 1252,
+  "1304": 1253,
+  "1305": 1254,
+  "1306": 1255,
+  "1307": 1256,
+  "1308": 1257,
+  "1309": 1258,
+  "1310": 1259,
+  "1311": 1260,
+  "1312": 1261,
+  "1313": 1262,
+  "1314": 1263,
+  "1315": 1264,
+  "1316": 1265,
+  "1317": 1266,
+  "1318": 1267,
+  "1319": 1268,
+  "1320": 1269,
+  "1321": 1270,
+  "1322": 1271,
+  "1323": 1272,
+  "1324": 1273,
+  "1325": 1274,
+  "1326": 1275,
+  "1327": 1276,
+  "1328": 1277,
+  "1329": 1278,
+  "1330": 1279,
+  "1331": 1280,
+  "1332": 1281,
+  "1333": 1282,
+  "1334": 1283,
+  "1335": 1284,
+  "1336": 1285,
+  "1337": 1286,
+  "1338": 1287,
+  "1339": 1288,
+  "1340": 1289,
+  "1341": 1290,
+  "1342": 1291,
+  "1343": 1292,
+  "1344": 1293,
+  "1345": 1294,
+  "1346": 1295,
+  "1347": 1296,
+  "1348": 1297,
+  "1349": 1298,
+  "1350": 1299,
+  "1351": 1300,
+  "1352": 1301,
+  "1353": 1302,
+  "1354": 1303,
+  "1355": 1304,
+  "1356": 1305,
+  "1357": 1306,
+  "1358": 1307,
+  "1359": 1308,
+  "1360": 1309,
+  "1361": 1310,
+  "1362": 1311,
+  "1363": 1312,
+  "1364": 1313,
+  "1365": 1314,
+  "1366": 1315,
+  "1367": 1316,
+  "1368": 1317,
+  "1369": 1318,
+  "1370": 1319,
+  "1371": 1320,
+  "1372": 1321,
+  "1373": 1322,
+  "1374": 1323,
+  "1375": 1324,
+  "1376": 1325,
+  "1377": 1326,
+  "1378": 1327,
+  "1379": 1328,
+  "1380": 1329,
+  "1381": 1330,
+  "1382": 1331,
+  "1383": 1332,
+  "1384": 1333,
+  "1385": 1334,
+  "1386": 1335,
+  "1387": 1336,
+  "1388": 1337,
+  "1389": 1338,
+  "1390": 1339,
+  "1391": 1340,
+  "1392": 1341,
+  "1393": 1342,
+  "1394": 1343,
+  "1395": 1344,
+  "1396": 1345,
+  "1397": 1346,
+  "1398": 1347,
+  "1399": 1348,
+  "1400": 1349,
+  "1401": 1350,
+  "1402": 1351,
+  "1403": 1352,
+  "1404": 1353,
+  "1405": 1354,
+  "1406": 1355,
+  "1407": 1356,
+  "1408": 1357,
+  "1409": 1358,
+  "1410": 1359,
+  "1411": 1360,
+  "1412": 1361,
+  "1413": 1362,
+  "1414": 1363,
+  "1415": 1364,
+  "1416": 1365,
+  "1417": 1366,
+  "1418": 1366,
+  "1419": 1367,
+  "1420": 1367,
+  "1421": 1368,
+  "1422": 1369,
+  "1423": 1370,
+  "1424": 1371,
+  "1425": 1372,
+  "1426": 1373,
+  "1427": 1374,
+  "1428": 1375,
+  "1429": 1376,
+  "1430": 1377,
+  "1431": 1378,
+  "1432": 1379,
+  "1433": 1380,
+  "1434": 1381,
+  "1435": 1382,
+  "1436": 1383,
+  "1437": 1384,
+  "1438": 1385,
+  "1439": 1386,
+  "1440": 1387,
+  "1441": 1388,
+  "1442": 1389,
+  "1443": 1390,
+  "1444": 1391,
+  "1445": 1392,
+  "1446": 1393,
+  "1447": 1394,
+  "1448": 1395,
+  "1449": 1396,
+  "1450": 1397,
+  "1451": 1398,
+  "1452": 1399,
+  "1453": 1400,
+  "1454": 1401,
+  "1455": 1402,
+  "1456": 1403,
+  "1457": 1404,
+  "1458": 1405,
+  "1459": 1406,
+  "1460": 1407,
+  "1461": 1408,
+  "1462": 1409,
+  "1463": 1410,
+  "1464": 1411,
+  "1465": 1412,
+  "1466": 1413,
+  "1467": 1414,
+  "1468": 1415,
+  "1469": 1416,
+  "1470": 1417,
+  "1471": 1418,
+  "1472": 1419,
+  "1473": 1420,
+  "1474": 1421,
+  "1475": 1422,
+  "1476": 1423,
+  "1477": 1424,
+  "1478": 1425,
+  "1479": 1426,
+  "1480": 1427,
+  "1481": 1428,
+  "1482": 1429,
+  "1483": 1430,
+  "1484": 1431,
+  "1485": 1432,
+  "1486": 1433,
+  "1487": 1434,
+  "1488": 1435,
+  "1489": 1436,
+  "1490": 1437,
+  "1491": 1438,
+  "1492": 1439,
+  "1493": 1440,
+  "1494": 1441,
+  "1495": 1442,
+  "1496": 1443,
+  "1497": 1444,
+  "1498": 1445,
+  "1499": 1446,
+  "1500": 1447,
+  "1501": 1448,
+  "1502": 1449,
+  "1503": 1450,
+  "1504": 1451,
+  "1505": 1452,
+  "1506": 1453,
+  "1507": 1454,
+  "1508": 1455,
+  "1509": 1456,
+  "1510": 1457,
+  "1511": 1458,
+  "1512": 1459,
+  "1513": 1460,
+  "1514": 1461,
+  "1515": 1462,
+  "1516": 1463,
+  "1517": 1464,
+  "1518": 1465,
+  "1519": 1466,
+  "1520": 1467,
+  "1521": 1468,
+  "1522": 1469,
+  "1523": 1470,
+  "1524": 1471,
+  "1525": 1472,
+  "1526": 1473,
+  "1527": 1474,
+  "1528": 1475,
+  "1529": 1476,
+  "1530": 1477,
+  "1531": 1478,
+  "1532": 1479,
+  "1533": 1480,
+  "1534": 1481,
+  "1535": 1482,
+  "1536": 1483,
+  "1537": 1484,
+  "1538": 1485,
+  "1539": 1486,
+  "1540": 1487,
+  "1541": 1488,
+  "1542": 1489,
+  "1543": 1490,
+  "1544": 1491,
+  "1545": 1491,
+  "1546": 1492,
+  "1547": 1492,
+  "1548": 1493,
+  "1549": 1494,
+  "1550": 1495,
+  "1551": 1496,
+  "1552": 1497,
+  "1553": 1498,
+  "1554": 1499,
+  "1555": 1500,
+  "1556": 1501,
+  "1557": 1502,
+  "1558": 1503,
+  "1559": 1504,
+  "1560": 1505,
+  "1561": 1506,
+  "1562": 1507,
+  "1563": 1508,
+  "1564": 1509,
+  "1565": 1509,
+  "1566": 1510,
+  "entities": {
+    "T1": [
+      13,
+      71
+    ],
+    "T2": [
+      109,
+      120
+    ],
+    "T3": [
+      151,
+      154
+    ],
+    "T4": [
+      159,
+      166
+    ],
+    "T5": [
+      263,
+      281
+    ],
+    "T6": [
+      315,
+      321
+    ],
+    "T7": [
+      322,
+      341
+    ],
+    "T8": [
+      358,
+      385
+    ],
+    "T9": [
+      387,
+      405
+    ],
+    "T10": [
+      407,
+      423
+    ],
+    "T11": [
+      429,
+      440
+    ],
+    "T12": [
+      501,
+      515
+    ],
+    "T13": [
+      593,
+      626
+    ],
+    "T14": [
+      628,
+      632
+    ],
+    "T15": [
+      651,
+      656
+    ],
+    "T16": [
+      716,
+      722
+    ],
+    "T17": [
+      723,
+      732
+    ],
+    "T18": [
+      869,
+      879
+    ],
+    "T19": [
+      939,
+      957
+    ],
+    "T20": [
+      965,
+      973
+    ],
+    "T21": [
+      993,
+      999
+    ],
+    "T22": [
+      1000,
+      1009
+    ],
+    "T23": [
+      1027,
+      1033
+    ],
+    "T24": [
+      1062,
+      1066
+    ],
+    "T25": [
+      1088,
+      1101
+    ],
+    "T26": [
+      1128,
+      1132
+    ],
+    "T27": [
+      1228,
+      1234
+    ],
+    "T28": [
+      1244,
+      1250
+    ],
+    "T29": [
+      1294,
+      1316
+    ],
+    "T30": [
+      1392,
+      1414
+    ],
+    "T31": [
+      1431,
+      1439
+    ],
+    "T32": [
+      1462,
+      1471
+    ],
+    "T33": [
+      1472,
+      1476
+    ],
+    "T34": [
+      1485,
+      1491
+    ],
+    "T35": [
+      1492,
+      1501
+    ],
+    "T36": [
+      72,
+      79
+    ],
+    "T37": [
+      167,
+      177
+    ],
+    "T38": [
+      239,
+      258
+    ],
+    "T39": [
+      282,
+      287
+    ],
+    "T40": [
+      442,
+      451
+    ],
+    "T41": [
+      585,
+      592
+    ],
+    "T42": [
+      634,
+      641
+    ],
+    "T43": [
+      657,
+      665
+    ],
+    "T44": [
+      733,
+      740
+    ],
+    "T45": [
+      923,
+      933
+    ],
+    "T46": [
+      958,
+      963
+    ],
+    "T47": [
+      974,
+      983
+    ],
+    "T48": [
+      1010,
+      1017
+    ],
+    "T49": [
+      1133,
+      1140
+    ],
+    "T50": [
+      1317,
+      1326
+    ],
+    "T51": [
+      1418,
+      1427
+    ],
+    "T52": [
+      1502,
+      1509
+    ]
+  }
+}
\ No newline at end of file
diff --git a/data/corpora/cg/debug/PMID-198130.map b/data/corpora/cg/debug/PMID-198130.map
new file mode 100644
index 0000000..ef839d0
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-198130.map
@@ -0,0 +1,1513 @@
+{
+  "0": 0,
+  "1": 1,
+  "2": 2,
+  "3": 3,
+  "4": 4,
+  "5": 5,
+  "6": 6,
+  "7": 7,
+  "8": 8,
+  "9": 9,
+  "10": 10,
+  "11": 11,
+  "12": 12,
+  "13": 13,
+  "14": 14,
+  "15": 15,
+  "16": 16,
+  "17": 17,
+  "18": 18,
+  "19": 20,
+  "20": 22,
+  "21": 23,
+  "22": 24,
+  "23": 25,
+  "24": 26,
+  "25": 27,
+  "26": 28,
+  "27": 29,
+  "28": 30,
+  "29": 31,
+  "30": 32,
+  "31": 33,
+  "32": 34,
+  "33": 35,
+  "34": 36,
+  "35": 37,
+  "36": 38,
+  "37": 39,
+  "38": 40,
+  "39": 41,
+  "40": 42,
+  "41": 43,
+  "42": 44,
+  "43": 45,
+  "44": 46,
+  "45": 47,
+  "46": 48,
+  "47": 49,
+  "48": 50,
+  "49": 51,
+  "50": 52,
+  "51": 53,
+  "52": 54,
+  "53": 55,
+  "54": 56,
+  "55": 57,
+  "56": 58,
+  "57": 59,
+  "58": 60,
+  "59": 61,
+  "60": 62,
+  "61": 63,
+  "62": 64,
+  "63": 65,
+  "64": 66,
+  "65": 67,
+  "66": 68,
+  "67": 69,
+  "68": 70,
+  "69": 71,
+  "70": 72,
+  "71": 73,
+  "72": 74,
+  "73": 75,
+  "74": 76,
+  "75": 77,
+  "76": 78,
+  "77": 79,
+  "78": 80,
+  "79": 81,
+  "80": 82,
+  "81": 83,
+  "82": 84,
+  "83": 85,
+  "84": 86,
+  "85": 87,
+  "86": 88,
+  "87": 89,
+  "88": 90,
+  "89": 91,
+  "90": 92,
+  "91": 93,
+  "92": 94,
+  "93": 95,
+  "94": 96,
+  "95": 97,
+  "96": 98,
+  "97": 99,
+  "98": 100,
+  "99": 101,
+  "100": 102,
+  "101": 103,
+  "102": 104,
+  "103": 105,
+  "104": 106,
+  "105": 107,
+  "106": 108,
+  "107": 109,
+  "108": 110,
+  "109": 111,
+  "110": 112,
+  "111": 113,
+  "112": 114,
+  "113": 115,
+  "114": 116,
+  "115": 117,
+  "116": 118,
+  "117": 119,
+  "118": 120,
+  "119": 121,
+  "120": 123,
+  "121": 124,
+  "122": 125,
+  "123": 126,
+  "124": 127,
+  "125": 128,
+  "126": 129,
+  "127": 130,
+  "128": 131,
+  "129": 132,
+  "130": 133,
+  "131": 134,
+  "132": 135,
+  "133": 136,
+  "134": 137,
+  "135": 138,
+  "136": 139,
+  "137": 140,
+  "138": 141,
+  "139": 142,
+  "140": 143,
+  "141": 144,
+  "142": 145,
+  "143": 146,
+  "144": 147,
+  "145": 148,
+  "146": 149,
+  "147": 150,
+  "148": 151,
+  "149": 153,
+  "150": 154,
+  "151": 155,
+  "152": 156,
+  "153": 157,
+  "154": 158,
+  "155": 159,
+  "156": 160,
+  "157": 161,
+  "158": 162,
+  "159": 163,
+  "160": 164,
+  "161": 165,
+  "162": 166,
+  "163": 167,
+  "164": 168,
+  "165": 169,
+  "166": 170,
+  "167": 171,
+  "168": 172,
+  "169": 173,
+  "170": 174,
+  "171": 175,
+  "172": 176,
+  "173": 177,
+  "174": 178,
+  "175": 179,
+  "176": 180,
+  "177": 182,
+  "178": 183,
+  "179": 184,
+  "180": 185,
+  "181": 186,
+  "182": 187,
+  "183": 188,
+  "184": 189,
+  "185": 190,
+  "186": 191,
+  "187": 192,
+  "188": 193,
+  "189": 194,
+  "190": 195,
+  "191": 196,
+  "192": 197,
+  "193": 198,
+  "194": 199,
+  "195": 200,
+  "196": 201,
+  "197": 202,
+  "198": 203,
+  "199": 204,
+  "200": 205,
+  "201": 206,
+  "202": 207,
+  "203": 208,
+  "204": 209,
+  "205": 210,
+  "206": 211,
+  "207": 212,
+  "208": 213,
+  "209": 214,
+  "210": 215,
+  "211": 216,
+  "212": 217,
+  "213": 218,
+  "214": 219,
+  "215": 220,
+  "216": 221,
+  "217": 222,
+  "218": 223,
+  "219": 224,
+  "220": 225,
+  "221": 226,
+  "222": 227,
+  "223": 228,
+  "224": 229,
+  "225": 230,
+  "226": 231,
+  "227": 232,
+  "228": 233,
+  "229": 234,
+  "230": 235,
+  "231": 236,
+  "232": 237,
+  "233": 238,
+  "234": 239,
+  "235": 240,
+  "236": 241,
+  "237": 242,
+  "238": 243,
+  "239": 244,
+  "240": 245,
+  "241": 246,
+  "242": 247,
+  "243": 248,
+  "244": 249,
+  "245": 250,
+  "246": 251,
+  "247": 252,
+  "248": 253,
+  "249": 254,
+  "250": 255,
+  "251": 256,
+  "252": 257,
+  "253": 258,
+  "254": 259,
+  "255": 260,
+  "256": 261,
+  "257": 262,
+  "258": 263,
+  "259": 264,
+  "260": 265,
+  "261": 266,
+  "262": 267,
+  "263": 268,
+  "264": 269,
+  "265": 270,
+  "266": 271,
+  "267": 272,
+  "268": 273,
+  "269": 274,
+  "270": 275,
+  "271": 276,
+  "272": 277,
+  "273": 278,
+  "274": 279,
+  "275": 280,
+  "276": 281,
+  "277": 282,
+  "278": 283,
+  "279": 284,
+  "280": 285,
+  "281": 286,
+  "282": 287,
+  "283": 288,
+  "284": 289,
+  "285": 290,
+  "286": 291,
+  "287": 292,
+  "288": 293,
+  "289": 294,
+  "290": 295,
+  "291": 296,
+  "292": 297,
+  "293": 298,
+  "294": 299,
+  "295": 300,
+  "296": 301,
+  "297": 302,
+  "298": 303,
+  "299": 304,
+  "300": 305,
+  "301": 306,
+  "302": 307,
+  "303": 308,
+  "304": 309,
+  "305": 310,
+  "306": 311,
+  "307": 312,
+  "308": 313,
+  "309": 314,
+  "310": 315,
+  "311": 316,
+  "312": 317,
+  "313": 318,
+  "314": 319,
+  "315": 320,
+  "316": 321,
+  "317": 322,
+  "318": 323,
+  "319": 324,
+  "320": 325,
+  "321": 326,
+  "322": 327,
+  "323": 328,
+  "324": 329,
+  "325": 330,
+  "326": 331,
+  "327": 332,
+  "328": 333,
+  "329": 334,
+  "330": 335,
+  "331": 336,
+  "332": 337,
+  "333": 338,
+  "334": 339,
+  "335": 340,
+  "336": 341,
+  "337": 342,
+  "338": 343,
+  "339": 344,
+  "340": 345,
+  "341": 346,
+  "342": 347,
+  "343": 349,
+  "344": 350,
+  "345": 351,
+  "346": 352,
+  "347": 353,
+  "348": 354,
+  "349": 355,
+  "350": 356,
+  "351": 357,
+  "352": 358,
+  "353": 359,
+  "354": 360,
+  "355": 361,
+  "356": 362,
+  "357": 363,
+  "358": 364,
+  "359": 365,
+  "360": 366,
+  "361": 367,
+  "362": 368,
+  "363": 369,
+  "364": 370,
+  "365": 371,
+  "366": 372,
+  "367": 373,
+  "368": 374,
+  "369": 375,
+  "370": 376,
+  "371": 377,
+  "372": 378,
+  "373": 379,
+  "374": 380,
+  "375": 381,
+  "376": 382,
+  "377": 383,
+  "378": 384,
+  "379": 385,
+  "380": 386,
+  "381": 387,
+  "382": 388,
+  "383": 389,
+  "384": 390,
+  "385": 392,
+  "386": 393,
+  "387": 394,
+  "388": 395,
+  "389": 396,
+  "390": 397,
+  "391": 398,
+  "392": 399,
+  "393": 400,
+  "394": 401,
+  "395": 402,
+  "396": 403,
+  "397": 404,
+  "398": 405,
+  "399": 406,
+  "400": 407,
+  "401": 408,
+  "402": 409,
+  "403": 410,
+  "404": 411,
+  "405": 413,
+  "406": 414,
+  "407": 415,
+  "408": 416,
+  "409": 418,
+  "410": 420,
+  "411": 421,
+  "412": 422,
+  "413": 423,
+  "414": 424,
+  "415": 425,
+  "416": 426,
+  "417": 427,
+  "418": 428,
+  "419": 429,
+  "420": 430,
+  "421": 431,
+  "422": 432,
+  "423": 434,
+  "424": 435,
+  "425": 436,
+  "426": 437,
+  "427": 438,
+  "428": 439,
+  "429": 440,
+  "430": 441,
+  "431": 442,
+  "432": 443,
+  "433": 444,
+  "434": 445,
+  "435": 446,
+  "436": 447,
+  "437": 448,
+  "438": 449,
+  "439": 450,
+  "440": 452,
+  "441": 453,
+  "442": 454,
+  "443": 455,
+  "444": 456,
+  "445": 457,
+  "446": 458,
+  "447": 459,
+  "448": 460,
+  "449": 461,
+  "450": 462,
+  "451": 463,
+  "452": 464,
+  "453": 465,
+  "454": 466,
+  "455": 467,
+  "456": 468,
+  "457": 469,
+  "458": 470,
+  "459": 471,
+  "460": 472,
+  "461": 473,
+  "462": 474,
+  "463": 475,
+  "464": 476,
+  "465": 477,
+  "466": 478,
+  "467": 479,
+  "468": 480,
+  "469": 481,
+  "470": 482,
+  "471": 483,
+  "472": 484,
+  "473": 485,
+  "474": 486,
+  "475": 487,
+  "476": 488,
+  "477": 489,
+  "478": 490,
+  "479": 491,
+  "480": 492,
+  "481": 493,
+  "482": 494,
+  "483": 495,
+  "484": 496,
+  "485": 497,
+  "486": 498,
+  "487": 499,
+  "488": 500,
+  "489": 501,
+  "490": 502,
+  "491": 503,
+  "492": 504,
+  "493": 505,
+  "494": 506,
+  "495": 507,
+  "496": 508,
+  "497": 509,
+  "498": 510,
+  "499": 511,
+  "500": 512,
+  "501": 513,
+  "502": 515,
+  "503": 517,
+  "504": 519,
+  "505": 520,
+  "506": 521,
+  "507": 523,
+  "508": 525,
+  "509": 526,
+  "510": 527,
+  "511": 528,
+  "512": 529,
+  "513": 530,
+  "514": 531,
+  "515": 533,
+  "516": 534,
+  "517": 535,
+  "518": 536,
+  "519": 537,
+  "520": 538,
+  "521": 539,
+  "522": 540,
+  "523": 541,
+  "524": 542,
+  "525": 543,
+  "526": 544,
+  "527": 545,
+  "528": 546,
+  "529": 547,
+  "530": 548,
+  "531": 549,
+  "532": 551,
+  "533": 552,
+  "534": 553,
+  "535": 554,
+  "536": 555,
+  "537": 556,
+  "538": 557,
+  "539": 558,
+  "540": 559,
+  "541": 560,
+  "542": 561,
+  "543": 562,
+  "544": 563,
+  "545": 564,
+  "546": 565,
+  "547": 566,
+  "548": 567,
+  "549": 568,
+  "550": 569,
+  "551": 570,
+  "552": 571,
+  "553": 572,
+  "554": 573,
+  "555": 574,
+  "556": 575,
+  "557": 576,
+  "558": 577,
+  "559": 578,
+  "560": 579,
+  "561": 580,
+  "562": 581,
+  "563": 582,
+  "564": 583,
+  "565": 584,
+  "566": 585,
+  "567": 586,
+  "568": 587,
+  "569": 588,
+  "570": 589,
+  "571": 590,
+  "572": 591,
+  "573": 592,
+  "574": 593,
+  "575": 594,
+  "576": 595,
+  "577": 596,
+  "578": 597,
+  "579": 598,
+  "580": 599,
+  "581": 600,
+  "582": 601,
+  "583": 602,
+  "584": 603,
+  "585": 604,
+  "586": 605,
+  "587": 606,
+  "588": 607,
+  "589": 608,
+  "590": 609,
+  "591": 610,
+  "592": 611,
+  "593": 612,
+  "594": 613,
+  "595": 614,
+  "596": 615,
+  "597": 616,
+  "598": 617,
+  "599": 618,
+  "600": 619,
+  "601": 620,
+  "602": 621,
+  "603": 622,
+  "604": 623,
+  "605": 624,
+  "606": 625,
+  "607": 626,
+  "608": 627,
+  "609": 628,
+  "610": 629,
+  "611": 630,
+  "612": 631,
+  "613": 632,
+  "614": 633,
+  "615": 634,
+  "616": 635,
+  "617": 636,
+  "618": 637,
+  "619": 638,
+  "620": 639,
+  "621": 640,
+  "622": 641,
+  "623": 642,
+  "624": 643,
+  "625": 644,
+  "626": 645,
+  "627": 646,
+  "628": 648,
+  "629": 649,
+  "630": 650,
+  "631": 651,
+  "632": 653,
+  "633": 654,
+  "634": 655,
+  "635": 656,
+  "636": 657,
+  "637": 658,
+  "638": 659,
+  "639": 660,
+  "640": 661,
+  "641": 662,
+  "642": 663,
+  "643": 664,
+  "644": 665,
+  "645": 666,
+  "646": 667,
+  "647": 668,
+  "648": 669,
+  "649": 670,
+  "650": 671,
+  "651": 672,
+  "652": 673,
+  "653": 674,
+  "654": 675,
+  "655": 676,
+  "656": 677,
+  "657": 678,
+  "658": 679,
+  "659": 680,
+  "660": 681,
+  "661": 682,
+  "662": 683,
+  "663": 684,
+  "664": 685,
+  "665": 686,
+  "666": 687,
+  "667": 688,
+  "668": 689,
+  "669": 690,
+  "670": 691,
+  "671": 692,
+  "672": 693,
+  "673": 694,
+  "674": 695,
+  "675": 696,
+  "676": 697,
+  "677": 698,
+  "678": 699,
+  "679": 700,
+  "680": 701,
+  "681": 702,
+  "682": 703,
+  "683": 704,
+  "684": 705,
+  "685": 706,
+  "686": 707,
+  "687": 708,
+  "688": 709,
+  "689": 710,
+  "690": 711,
+  "691": 712,
+  "692": 713,
+  "693": 714,
+  "694": 715,
+  "695": 716,
+  "696": 717,
+  "697": 718,
+  "698": 719,
+  "699": 720,
+  "700": 721,
+  "701": 722,
+  "702": 723,
+  "703": 724,
+  "704": 725,
+  "705": 726,
+  "706": 727,
+  "707": 728,
+  "708": 729,
+  "709": 730,
+  "710": 731,
+  "711": 732,
+  "712": 733,
+  "713": 734,
+  "714": 735,
+  "715": 736,
+  "716": 737,
+  "717": 738,
+  "718": 739,
+  "719": 740,
+  "720": 741,
+  "721": 742,
+  "722": 744,
+  "723": 746,
+  "724": 747,
+  "725": 748,
+  "726": 749,
+  "727": 750,
+  "728": 751,
+  "729": 752,
+  "730": 753,
+  "731": 754,
+  "732": 755,
+  "733": 756,
+  "734": 757,
+  "735": 758,
+  "736": 759,
+  "737": 760,
+  "738": 761,
+  "739": 762,
+  "740": 763,
+  "741": 764,
+  "742": 766,
+  "743": 768,
+  "744": 770,
+  "745": 772,
+  "746": 774,
+  "747": 776,
+  "748": 777,
+  "749": 778,
+  "750": 779,
+  "751": 780,
+  "752": 781,
+  "753": 782,
+  "754": 783,
+  "755": 784,
+  "756": 785,
+  "757": 786,
+  "758": 787,
+  "759": 788,
+  "760": 789,
+  "761": 790,
+  "762": 791,
+  "763": 792,
+  "764": 793,
+  "765": 795,
+  "766": 797,
+  "767": 799,
+  "768": 800,
+  "769": 801,
+  "770": 802,
+  "771": 803,
+  "772": 804,
+  "773": 805,
+  "774": 806,
+  "775": 807,
+  "776": 808,
+  "777": 809,
+  "778": 811,
+  "779": 812,
+  "780": 813,
+  "781": 814,
+  "782": 815,
+  "783": 816,
+  "784": 817,
+  "785": 818,
+  "786": 819,
+  "787": 820,
+  "788": 821,
+  "789": 822,
+  "790": 824,
+  "791": 826,
+  "792": 827,
+  "793": 828,
+  "794": 829,
+  "795": 831,
+  "796": 832,
+  "797": 833,
+  "798": 834,
+  "799": 835,
+  "800": 836,
+  "801": 838,
+  "802": 840,
+  "803": 841,
+  "804": 842,
+  "805": 843,
+  "806": 845,
+  "807": 846,
+  "808": 847,
+  "809": 848,
+  "810": 849,
+  "811": 850,
+  "812": 852,
+  "813": 854,
+  "814": 855,
+  "815": 856,
+  "816": 857,
+  "817": 858,
+  "818": 859,
+  "819": 860,
+  "820": 861,
+  "821": 862,
+  "822": 863,
+  "823": 864,
+  "824": 865,
+  "825": 866,
+  "826": 867,
+  "827": 868,
+  "828": 869,
+  "829": 870,
+  "830": 871,
+  "831": 872,
+  "832": 873,
+  "833": 874,
+  "834": 875,
+  "835": 876,
+  "836": 877,
+  "837": 878,
+  "838": 879,
+  "839": 880,
+  "840": 881,
+  "841": 882,
+  "842": 883,
+  "843": 884,
+  "844": 885,
+  "845": 886,
+  "846": 887,
+  "847": 888,
+  "848": 889,
+  "849": 890,
+  "850": 891,
+  "851": 892,
+  "852": 893,
+  "853": 894,
+  "854": 895,
+  "855": 896,
+  "856": 897,
+  "857": 898,
+  "858": 899,
+  "859": 900,
+  "860": 901,
+  "861": 902,
+  "862": 903,
+  "863": 904,
+  "864": 905,
+  "865": 906,
+  "866": 907,
+  "867": 908,
+  "868": 909,
+  "869": 910,
+  "870": 911,
+  "871": 912,
+  "872": 913,
+  "873": 914,
+  "874": 915,
+  "875": 916,
+  "876": 917,
+  "877": 918,
+  "878": 919,
+  "879": 921,
+  "880": 922,
+  "881": 923,
+  "882": 924,
+  "883": 925,
+  "884": 926,
+  "885": 927,
+  "886": 928,
+  "887": 929,
+  "888": 930,
+  "889": 931,
+  "890": 932,
+  "891": 933,
+  "892": 934,
+  "893": 935,
+  "894": 936,
+  "895": 937,
+  "896": 938,
+  "897": 939,
+  "898": 940,
+  "899": 941,
+  "900": 942,
+  "901": 943,
+  "902": 944,
+  "903": 945,
+  "904": 946,
+  "905": 947,
+  "906": 948,
+  "907": 949,
+  "908": 950,
+  "909": 951,
+  "910": 952,
+  "911": 953,
+  "912": 954,
+  "913": 955,
+  "914": 956,
+  "915": 957,
+  "916": 958,
+  "917": 959,
+  "918": 960,
+  "919": 961,
+  "920": 962,
+  "921": 963,
+  "922": 964,
+  "923": 965,
+  "924": 966,
+  "925": 967,
+  "926": 968,
+  "927": 969,
+  "928": 970,
+  "929": 971,
+  "930": 972,
+  "931": 973,
+  "932": 974,
+  "933": 976,
+  "934": 977,
+  "935": 978,
+  "936": 979,
+  "937": 980,
+  "938": 981,
+  "939": 982,
+  "940": 983,
+  "941": 984,
+  "942": 985,
+  "943": 986,
+  "944": 987,
+  "945": 988,
+  "946": 989,
+  "947": 990,
+  "948": 991,
+  "949": 992,
+  "950": 993,
+  "951": 994,
+  "952": 995,
+  "953": 996,
+  "954": 997,
+  "955": 998,
+  "956": 999,
+  "957": 1000,
+  "958": 1001,
+  "959": 1002,
+  "960": 1003,
+  "961": 1004,
+  "962": 1005,
+  "963": 1007,
+  "964": 1008,
+  "965": 1009,
+  "966": 1010,
+  "967": 1011,
+  "968": 1012,
+  "969": 1013,
+  "970": 1014,
+  "971": 1015,
+  "972": 1016,
+  "973": 1017,
+  "974": 1018,
+  "975": 1019,
+  "976": 1020,
+  "977": 1021,
+  "978": 1022,
+  "979": 1023,
+  "980": 1024,
+  "981": 1025,
+  "982": 1026,
+  "983": 1028,
+  "984": 1029,
+  "985": 1030,
+  "986": 1031,
+  "987": 1032,
+  "988": 1033,
+  "989": 1034,
+  "990": 1035,
+  "991": 1036,
+  "992": 1037,
+  "993": 1038,
+  "994": 1039,
+  "995": 1040,
+  "996": 1041,
+  "997": 1042,
+  "998": 1043,
+  "999": 1045,
+  "1000": 1047,
+  "1001": 1048,
+  "1002": 1049,
+  "1003": 1050,
+  "1004": 1051,
+  "1005": 1052,
+  "1006": 1053,
+  "1007": 1054,
+  "1008": 1055,
+  "1009": 1056,
+  "1010": 1057,
+  "1011": 1058,
+  "1012": 1059,
+  "1013": 1060,
+  "1014": 1061,
+  "1015": 1062,
+  "1016": 1063,
+  "1017": 1064,
+  "1018": 1065,
+  "1019": 1066,
+  "1020": 1067,
+  "1021": 1068,
+  "1022": 1069,
+  "1023": 1070,
+  "1024": 1071,
+  "1025": 1072,
+  "1026": 1073,
+  "1027": 1074,
+  "1028": 1075,
+  "1029": 1076,
+  "1030": 1077,
+  "1031": 1078,
+  "1032": 1079,
+  "1033": 1081,
+  "1034": 1082,
+  "1035": 1083,
+  "1036": 1084,
+  "1037": 1085,
+  "1038": 1086,
+  "1039": 1087,
+  "1040": 1088,
+  "1041": 1089,
+  "1042": 1090,
+  "1043": 1091,
+  "1044": 1092,
+  "1045": 1093,
+  "1046": 1094,
+  "1047": 1095,
+  "1048": 1096,
+  "1049": 1097,
+  "1050": 1098,
+  "1051": 1099,
+  "1052": 1100,
+  "1053": 1101,
+  "1054": 1102,
+  "1055": 1103,
+  "1056": 1104,
+  "1057": 1105,
+  "1058": 1106,
+  "1059": 1107,
+  "1060": 1108,
+  "1061": 1109,
+  "1062": 1110,
+  "1063": 1111,
+  "1064": 1112,
+  "1065": 1113,
+  "1066": 1114,
+  "1067": 1115,
+  "1068": 1116,
+  "1069": 1117,
+  "1070": 1118,
+  "1071": 1119,
+  "1072": 1120,
+  "1073": 1121,
+  "1074": 1122,
+  "1075": 1123,
+  "1076": 1124,
+  "1077": 1125,
+  "1078": 1126,
+  "1079": 1127,
+  "1080": 1128,
+  "1081": 1129,
+  "1082": 1130,
+  "1083": 1131,
+  "1084": 1132,
+  "1085": 1133,
+  "1086": 1134,
+  "1087": 1135,
+  "1088": 1136,
+  "1089": 1137,
+  "1090": 1138,
+  "1091": 1139,
+  "1092": 1140,
+  "1093": 1141,
+  "1094": 1142,
+  "1095": 1143,
+  "1096": 1144,
+  "1097": 1145,
+  "1098": 1146,
+  "1099": 1147,
+  "1100": 1148,
+  "1101": 1149,
+  "1102": 1150,
+  "1103": 1151,
+  "1104": 1152,
+  "1105": 1153,
+  "1106": 1154,
+  "1107": 1155,
+  "1108": 1156,
+  "1109": 1157,
+  "1110": 1158,
+  "1111": 1159,
+  "1112": 1160,
+  "1113": 1161,
+  "1114": 1162,
+  "1115": 1163,
+  "1116": 1164,
+  "1117": 1165,
+  "1118": 1166,
+  "1119": 1167,
+  "1120": 1168,
+  "1121": 1169,
+  "1122": 1170,
+  "1123": 1171,
+  "1124": 1172,
+  "1125": 1173,
+  "1126": 1174,
+  "1127": 1175,
+  "1128": 1176,
+  "1129": 1177,
+  "1130": 1178,
+  "1131": 1179,
+  "1132": 1180,
+  "1133": 1181,
+  "1134": 1182,
+  "1135": 1183,
+  "1136": 1184,
+  "1137": 1185,
+  "1138": 1186,
+  "1139": 1187,
+  "1140": 1189,
+  "1141": 1190,
+  "1142": 1191,
+  "1143": 1192,
+  "1144": 1193,
+  "1145": 1194,
+  "1146": 1195,
+  "1147": 1196,
+  "1148": 1197,
+  "1149": 1198,
+  "1150": 1199,
+  "1151": 1200,
+  "1152": 1201,
+  "1153": 1202,
+  "1154": 1203,
+  "1155": 1204,
+  "1156": 1205,
+  "1157": 1206,
+  "1158": 1207,
+  "1159": 1208,
+  "1160": 1209,
+  "1161": 1210,
+  "1162": 1211,
+  "1163": 1212,
+  "1164": 1213,
+  "1165": 1214,
+  "1166": 1215,
+  "1167": 1216,
+  "1168": 1217,
+  "1169": 1218,
+  "1170": 1219,
+  "1171": 1220,
+  "1172": 1221,
+  "1173": 1222,
+  "1174": 1223,
+  "1175": 1224,
+  "1176": 1225,
+  "1177": 1226,
+  "1178": 1227,
+  "1179": 1228,
+  "1180": 1229,
+  "1181": 1231,
+  "1182": 1232,
+  "1183": 1233,
+  "1184": 1234,
+  "1185": 1235,
+  "1186": 1236,
+  "1187": 1237,
+  "1188": 1238,
+  "1189": 1239,
+  "1190": 1240,
+  "1191": 1241,
+  "1192": 1242,
+  "1193": 1243,
+  "1194": 1244,
+  "1195": 1245,
+  "1196": 1246,
+  "1197": 1247,
+  "1198": 1248,
+  "1199": 1249,
+  "1200": 1250,
+  "1201": 1251,
+  "1202": 1252,
+  "1203": 1253,
+  "1204": 1254,
+  "1205": 1255,
+  "1206": 1256,
+  "1207": 1257,
+  "1208": 1258,
+  "1209": 1259,
+  "1210": 1260,
+  "1211": 1261,
+  "1212": 1262,
+  "1213": 1263,
+  "1214": 1264,
+  "1215": 1265,
+  "1216": 1266,
+  "1217": 1267,
+  "1218": 1268,
+  "1219": 1269,
+  "1220": 1270,
+  "1221": 1271,
+  "1222": 1272,
+  "1223": 1273,
+  "1224": 1274,
+  "1225": 1275,
+  "1226": 1276,
+  "1227": 1277,
+  "1228": 1278,
+  "1229": 1279,
+  "1230": 1280,
+  "1231": 1281,
+  "1232": 1282,
+  "1233": 1283,
+  "1234": 1284,
+  "1235": 1285,
+  "1236": 1286,
+  "1237": 1287,
+  "1238": 1288,
+  "1239": 1289,
+  "1240": 1290,
+  "1241": 1291,
+  "1242": 1292,
+  "1243": 1293,
+  "1244": 1294,
+  "1245": 1295,
+  "1246": 1296,
+  "1247": 1297,
+  "1248": 1298,
+  "1249": 1299,
+  "1250": 1301,
+  "1251": 1302,
+  "1252": 1303,
+  "1253": 1304,
+  "1254": 1305,
+  "1255": 1306,
+  "1256": 1307,
+  "1257": 1308,
+  "1258": 1309,
+  "1259": 1310,
+  "1260": 1311,
+  "1261": 1312,
+  "1262": 1313,
+  "1263": 1314,
+  "1264": 1315,
+  "1265": 1316,
+  "1266": 1317,
+  "1267": 1318,
+  "1268": 1319,
+  "1269": 1320,
+  "1270": 1321,
+  "1271": 1322,
+  "1272": 1323,
+  "1273": 1324,
+  "1274": 1325,
+  "1275": 1326,
+  "1276": 1327,
+  "1277": 1328,
+  "1278": 1329,
+  "1279": 1330,
+  "1280": 1331,
+  "1281": 1332,
+  "1282": 1333,
+  "1283": 1334,
+  "1284": 1335,
+  "1285": 1336,
+  "1286": 1337,
+  "1287": 1338,
+  "1288": 1339,
+  "1289": 1340,
+  "1290": 1341,
+  "1291": 1342,
+  "1292": 1343,
+  "1293": 1344,
+  "1294": 1345,
+  "1295": 1346,
+  "1296": 1347,
+  "1297": 1348,
+  "1298": 1349,
+  "1299": 1350,
+  "1300": 1351,
+  "1301": 1352,
+  "1302": 1353,
+  "1303": 1354,
+  "1304": 1355,
+  "1305": 1356,
+  "1306": 1357,
+  "1307": 1358,
+  "1308": 1359,
+  "1309": 1360,
+  "1310": 1361,
+  "1311": 1362,
+  "1312": 1363,
+  "1313": 1364,
+  "1314": 1365,
+  "1315": 1366,
+  "1316": 1367,
+  "1317": 1368,
+  "1318": 1369,
+  "1319": 1370,
+  "1320": 1371,
+  "1321": 1372,
+  "1322": 1373,
+  "1323": 1374,
+  "1324": 1375,
+  "1325": 1376,
+  "1326": 1377,
+  "1327": 1378,
+  "1328": 1379,
+  "1329": 1380,
+  "1330": 1381,
+  "1331": 1382,
+  "1332": 1383,
+  "1333": 1384,
+  "1334": 1385,
+  "1335": 1386,
+  "1336": 1387,
+  "1337": 1388,
+  "1338": 1389,
+  "1339": 1390,
+  "1340": 1391,
+  "1341": 1392,
+  "1342": 1393,
+  "1343": 1394,
+  "1344": 1395,
+  "1345": 1396,
+  "1346": 1397,
+  "1347": 1398,
+  "1348": 1399,
+  "1349": 1400,
+  "1350": 1401,
+  "1351": 1402,
+  "1352": 1403,
+  "1353": 1404,
+  "1354": 1405,
+  "1355": 1406,
+  "1356": 1407,
+  "1357": 1408,
+  "1358": 1409,
+  "1359": 1410,
+  "1360": 1411,
+  "1361": 1412,
+  "1362": 1413,
+  "1363": 1414,
+  "1364": 1415,
+  "1365": 1416,
+  "1366": 1418,
+  "1367": 1420,
+  "1368": 1421,
+  "1369": 1422,
+  "1370": 1423,
+  "1371": 1424,
+  "1372": 1425,
+  "1373": 1426,
+  "1374": 1427,
+  "1375": 1428,
+  "1376": 1429,
+  "1377": 1430,
+  "1378": 1431,
+  "1379": 1432,
+  "1380": 1433,
+  "1381": 1434,
+  "1382": 1435,
+  "1383": 1436,
+  "1384": 1437,
+  "1385": 1438,
+  "1386": 1439,
+  "1387": 1440,
+  "1388": 1441,
+  "1389": 1442,
+  "1390": 1443,
+  "1391": 1444,
+  "1392": 1445,
+  "1393": 1446,
+  "1394": 1447,
+  "1395": 1448,
+  "1396": 1449,
+  "1397": 1450,
+  "1398": 1451,
+  "1399": 1452,
+  "1400": 1453,
+  "1401": 1454,
+  "1402": 1455,
+  "1403": 1456,
+  "1404": 1457,
+  "1405": 1458,
+  "1406": 1459,
+  "1407": 1460,
+  "1408": 1461,
+  "1409": 1462,
+  "1410": 1463,
+  "1411": 1464,
+  "1412": 1465,
+  "1413": 1466,
+  "1414": 1467,
+  "1415": 1468,
+  "1416": 1469,
+  "1417": 1470,
+  "1418": 1471,
+  "1419": 1472,
+  "1420": 1473,
+  "1421": 1474,
+  "1422": 1475,
+  "1423": 1476,
+  "1424": 1477,
+  "1425": 1478,
+  "1426": 1479,
+  "1427": 1480,
+  "1428": 1481,
+  "1429": 1482,
+  "1430": 1483,
+  "1431": 1484,
+  "1432": 1485,
+  "1433": 1486,
+  "1434": 1487,
+  "1435": 1488,
+  "1436": 1489,
+  "1437": 1490,
+  "1438": 1491,
+  "1439": 1492,
+  "1440": 1493,
+  "1441": 1494,
+  "1442": 1495,
+  "1443": 1496,
+  "1444": 1497,
+  "1445": 1498,
+  "1446": 1499,
+  "1447": 1500,
+  "1448": 1501,
+  "1449": 1502,
+  "1450": 1503,
+  "1451": 1504,
+  "1452": 1505,
+  "1453": 1506,
+  "1454": 1507,
+  "1455": 1508,
+  "1456": 1509,
+  "1457": 1510,
+  "1458": 1511,
+  "1459": 1512,
+  "1460": 1513,
+  "1461": 1514,
+  "1462": 1515,
+  "1463": 1516,
+  "1464": 1517,
+  "1465": 1518,
+  "1466": 1519,
+  "1467": 1520,
+  "1468": 1521,
+  "1469": 1522,
+  "1470": 1523,
+  "1471": 1524,
+  "1472": 1525,
+  "1473": 1526,
+  "1474": 1527,
+  "1475": 1528,
+  "1476": 1529,
+  "1477": 1530,
+  "1478": 1531,
+  "1479": 1532,
+  "1480": 1533,
+  "1481": 1534,
+  "1482": 1535,
+  "1483": 1536,
+  "1484": 1537,
+  "1485": 1538,
+  "1486": 1539,
+  "1487": 1540,
+  "1488": 1541,
+  "1489": 1542,
+  "1490": 1543,
+  "1491": 1545,
+  "1492": 1547,
+  "1493": 1548,
+  "1494": 1549,
+  "1495": 1550,
+  "1496": 1551,
+  "1497": 1552,
+  "1498": 1553,
+  "1499": 1554,
+  "1500": 1555,
+  "1501": 1556,
+  "1502": 1557,
+  "1503": 1558,
+  "1504": 1559,
+  "1505": 1560,
+  "1506": 1561,
+  "1507": 1562,
+  "1508": 1563,
+  "1509": 1565,
+  "1510": 1566
+}
\ No newline at end of file
diff --git a/data/corpora/cg/debug/PMID-198130.txt b/data/corpora/cg/debug/PMID-198130.txt
new file mode 100644
index 0000000..09918d5
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-198130.txt
@@ -0,0 +1,6 @@
+Magnitude of malate - aspartate reduced nicotinamide adenine dinucleotide shuttle activity in intact respiring tumor cells .
+Measurements of respiration , CO2 and lactate production , and changes in the levels of various key metabolites of the glycolytic sequence and tricarboxylic acid cycle were made on five lines of rodent ascites tumor cells ( two strains of Ehrlich ascites tumor cells , Krebs II carcinoma , AS - 30D carcinoma , and L1210 cells ) incubated aerobically in the presence of uniformly labeled D - [ 14C ] glucose .
+From these data , as well as earlier evidence demonstrating that the reduced nicotinamide adenine dinucleotide ( NADH ) shuttle in these cells requires a transaminase step and is thus identified as the malate - aspartate shuttle ( W . V . V . Greenhouse and A . L . Lehninger , Cancer Res . , 36 : 1392 - 1396 , 1976 ) , metabolic flux diagrams were constructed for the five cell lines .
+These diagrams show the relative rates of glycolysis , the tricarboxylic acid cycle , electron transport , and the malate - aspartate shuttle in these tumors .
+Large amounts of cytosolic NADH were oxidized by the mitochondrial respiratory chain via the NADH shuttle , comprising anywhere from about 20 to 80 % of the total flow of reducing equivalents to oxygen in these tumors .
+Calculations of the sources of energy for adenosine triphosphate synthesis indicated that on the average about one - third of the respiratory adenosine triphosphate is generated by electron flow originating from cytosolic NADH via the malate - aspartate shuttle .
\ No newline at end of file
diff --git a/data/corpora/cg/debug/PMID-198130.txt.ori b/data/corpora/cg/debug/PMID-198130.txt.ori
new file mode 100644
index 0000000..87d7ea0
--- /dev/null
+++ b/data/corpora/cg/debug/PMID-198130.txt.ori
@@ -0,0 +1,2 @@
+Magnitude of malate-aspartate reduced nicotinamide adenine dinucleotide shuttle activity in intact respiring tumor cells.
+Measurements of respiration, CO2 and lactate production, and changes in the levels of various key metabolites of the glycolytic sequence and tricarboxylic acid cycle were made on five lines of rodent ascites tumor cells (two strains of Ehrlich ascites tumor cells, Krebs II carcinoma, AS-30D carcinoma, and L1210 cells) incubated aerobically in the presence of uniformly labeled D-[14C]glucose. From these data, as well as earlier evidence demonstrating that the reduced nicotinamide adenine dinucleotide (NADH) shuttle in these cells requires a transaminase step and is thus identified as the malate-aspartate shuttle (W.V.V. Greenhouse and A.L. Lehninger, Cancer Res., 36: 1392-1396, 1976), metabolic flux diagrams were constructed for the five cell lines. These diagrams show the relative rates of glycolysis, the tricarboxylic acid cycle, electron transport, and the malate-aspartate shuttle in these tumors. Large amounts of cytosolic NADH were oxidized by the mitochondrial respiratory chain via the NADH shuttle, comprising anywhere from about 20 to 80% of the total flow of reducing equivalents to oxygen in these tumors. Calculations of the sources of energy for adenosine triphosphate synthesis indicated that on the average about one-third of the respiratory adenosine triphosphate is generated by electron flow originating from cytosolic NADH via the malate-aspartate shuttle.

From dc8c5356dd69b1016f4c4a824384f7105ba6636f Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:33:23 +0900
Subject: [PATCH 14/70] training model

---
 eval/evalEV.py              |  381 ++++-----
 eval/evalNER.py             |  210 +++++
 eval/evalRE.py              |  414 +++++++---
 eval/evaluate.py            |  413 +++++++++-
 eval/scripts/n2c2.py        | 1511 +++++++++++++++++++++++++++++++++++
 loader/prepData/brat.py     |  164 +++-
 loader/prepData/entity.py   |  132 ++-
 loader/prepData/event.py    |  270 +++++++
 loader/prepData/prepdata.py |   47 +-
 loader/prepData/relation.py |  118 +++
 loader/prepData/sentence.py |  105 ++-
 loader/prepNN/ent2net.py    |   41 +-
 loader/prepNN/ev2net.py     |  306 +++++++
 loader/prepNN/mapping.py    |  252 ++++++
 loader/prepNN/prep4nn.py    |   61 +-
 loader/prepNN/rel2net.py    |   23 +
 loader/prepNN/sent2net.py   |    6 +-
 loader/prepNN/span4nn.py    |   79 +-
 loader/prepNN/structure.py  |  283 +++++++
 {nets => model}/EVGen.py    |  223 +++++-
 {nets => model}/EVNet.py    |  209 +++--
 {nets => model}/NERNet.py   |  110 ++-
 model/RELNet.py             |  309 +++++++
 model/deepEM.py             |  484 +++++++++++
 model/training.py           |  225 ++++++
 nets/RELNet.py              |  147 ----
 nets/deepEM.py              |  298 -------
 train.py                    |  215 +++++
 28 files changed, 6089 insertions(+), 947 deletions(-)
 create mode 100644 eval/evalNER.py
 create mode 100644 eval/scripts/n2c2.py
 create mode 100644 loader/prepData/event.py
 create mode 100644 loader/prepData/relation.py
 create mode 100644 loader/prepNN/ev2net.py
 create mode 100644 loader/prepNN/mapping.py
 create mode 100644 loader/prepNN/rel2net.py
 create mode 100644 loader/prepNN/structure.py
 rename {nets => model}/EVGen.py (85%)
 rename {nets => model}/EVNet.py (80%)
 rename {nets => model}/NERNet.py (70%)
 create mode 100644 model/RELNet.py
 create mode 100644 model/deepEM.py
 create mode 100644 model/training.py
 delete mode 100644 nets/RELNet.py
 delete mode 100644 nets/deepEM.py
 create mode 100644 train.py

diff --git a/eval/evalEV.py b/eval/evalEV.py
index 5b6725c..7590b4a 100644
--- a/eval/evalEV.py
+++ b/eval/evalEV.py
@@ -1,6 +1,8 @@
 import collections
 import os
 
+from loguru import logger
+
 
 def get_entity_attrs(e_idx, words, offset, span_indices, sub_to_words):
     e_span_indice = span_indices[e_idx]
@@ -67,6 +69,7 @@ def generate_events(fids, all_ev_preds, params):
 
         # accumulated event numbers to count event id
         acc_evid = 0
+        # ev_count = 0
 
         # store event ids
         evids_ = collections.OrderedDict()
@@ -81,8 +84,15 @@ def generate_events(fids, all_ev_preds, params):
 
                 # set event id
                 ev_id = xx1 + acc_evid
+                # if level == 0:
+                # ev_id = xx1
+                # ev_id = acc_evid + xx1
+                # else:
+                #     ev_id = level * len(ev_preds_levels_[level-1]) + xx1
+                # ev_id = acc_evid + xx1
 
                 ev_id_str = (str(xi) + '_' + str(ev_id))
+                # ev_id_ = (xi, ev_id)
 
                 # store evid for nested events
                 evids_[(level, xx1)] = ev_id_str
@@ -103,9 +113,15 @@ def generate_events(fids, all_ev_preds, params):
                 ev_data.append((xi, (trid[0], trid[1])))
 
                 # get relation structure
+                # rel_struct_counter = rel_struct_[0]
                 rel_struct_list = rel_struct_[1]
 
+                # check no-argument
+                # if len(a2ids) == 0:
+                #     continue
+
                 # has argument
+                # if len(a2ids) > 0:
                 if len(rel_struct_list) > 0:
 
                     # store args_data
@@ -116,6 +132,8 @@ def generate_events(fids, all_ev_preds, params):
 
                     for argid, a2id in enumerate(a2ids):
 
+                        # print(argid, rel_struct_list, rel_struct_counter, trid, a2ids)
+
                         # get relation type id
                         rel_group = rel_struct_list[argid]  # (rtypeid, argtypeid)
                         rtypeid = rel_group[0]
@@ -127,12 +145,17 @@ def generate_events(fids, all_ev_preds, params):
                         else:
                             dup_rtypes[rtypeid] += 1
 
+                        # create id for a2
+                        # check whether this is entity or event argument
+
                         # event argument
                         if level > 0 and len(a2id) > 2:
-
+                            # evlevel = a2id[1]
+                            # evxx1 = a2id[2]
                             evlevel_id = a2id[2]
 
                             # look up in the event ids list
+                            # added_evid = evids_[(evlevel, evxx1)]
                             added_evid = evids_[evlevel_id]
                             a2bid = (added_evid, -1, -1)  # add -1 to mark the event argument
 
@@ -292,253 +315,105 @@ def convert_evid_to_number(str_evid):
     return int(evid[0] + evid[1])
 
 
-def mapping_entity_id(en_preds_, g_entity_ids_, rev_type_map, params):
-    # if gold entity, starting trigger id from max entity id + 1
-    if not params['ner_predict_all'] and len(g_entity_ids_) > 0:
-        eid = g_entity_ids_[-1] + 1
-
-    # predict both entity and trigger
-    else:
-        eid = 1
-
-    # mapping
-    enid_mapping = collections.OrderedDict()
-    en_preds_out_ = []
-
-    # entity in a2
-    a2_ents_ = []
-
-    # create mapping for entity id first
-    for pr_id, en_pred in en_preds_.items():
-
-        # id
-        en_id = en_pred[0]
-
-        if en_id.startswith('TR'):
-            continue
-
-        elif en_id.startswith('T'):
-            enid_mapping[en_id] = 'T' + str(eid)
-            eid += 1
-            en_preds_out_.append(en_pred)
-
-        # using gold entity but in a2
-        if not params['ner_predict_all']:
-            etype = rev_type_map[en_pred[1]]
-
-            # check entity type in a2
-            if etype in params['a2_entities']:
-                a2_ents_.append(en_id)
-
-    # creat mapping for trigger id
-    for pr_id, en_pred in en_preds_.items():
-        # id
-        en_id = en_pred[0]
-
-        if en_id.startswith('TR'):
-            enid_mapping[en_id] = 'T' + str(eid)
-            eid += 1
-            en_preds_out_.append(en_pred)
-
-    return enid_mapping, en_preds_out_, a2_ents_
-
-
 # write events to file
-def write_ev_2file(pred_output, pred_ents, result_dir, g_entity_ids_, params):
-    a2dir = result_dir + 'ev-last/ev-tok-a2/'
-    anndir = result_dir + 'ev-last/ev-tok-ann/'
+def write_ev_2file(pred_output, result_dir, params):
     rev_type_map = params['mappings']['rev_type_map']
 
-    # entity id mapping
-    # feid_mapping = collections.OrderedDict()
-
-    if not os.path.exists(a2dir):
-        os.makedirs(a2dir)
+    dir2wr = result_dir + 'ev-last/ev-ann/'
+    if not os.path.exists(dir2wr):
+        os.makedirs(dir2wr)
     else:
-        os.system('rm ' + a2dir + '*.a2')
+        os.system('rm ' + dir2wr + '*.a2')
 
-    if not os.path.exists(anndir):
-        os.makedirs(anndir)
-    else:
-        os.system('rm ' + anndir + '*.a2')
-        os.system('rm ' + anndir + '*.a1')
-
-    # write event and triggers, (and entity: if predict both entity and trigger)
     for fid, preds in pred_output.items():
-        ev_en_preds_ = preds[0]
+        triggers = preds[0]
         events = preds[1]
 
-        enid_mapping, en_preds_out_, a2_ents_ = mapping_entity_id(pred_ents[fid], g_entity_ids_[fid], rev_type_map, params)
-
-        # entity and trigger for ann file
-        ann_en_lines = []
-        ann_tr_lines = []
-        # write entity and trigger from entity predictions
-        for pr_id, e_pred in pred_ents[fid].items():
-            e0_id = e_pred[0]
-            e_id = enid_mapping[e0_id]
-
-            output = ''.join(
-                [e_id, '\t', rev_type_map[e_pred[1]], ' ', str(e_pred[2][0]), ' ', str(e_pred[2][1]), '\t',
-                 e_pred[3], '\n'])
+        with open(dir2wr + fid + '.a2', 'w') as o2file:
 
-            if e0_id.startswith('TR'):
-                ann_tr_lines.append(output)
+            for trigger in triggers:
+                o2file.write(trigger[0].replace('TR', 'T') + '\t' + rev_type_map[trigger[1]] + ' ' +
+                             str(trigger[2][0]) + ' ' + str(trigger[2][1]) + '\t' + trigger[3] + '\n')
 
-            # only write entity to a1
-            elif e0_id.startswith('T'):
-
-                # entity in a2
-                if e0_id in a2_ents_:
-                    ann_tr_lines.append(output)
-
-                else:
-                    ann_en_lines.append(output)
+            # count event id
+            f_evid = 0
 
+            # mapping event id to incremental id
+            f_evid_map = collections.OrderedDict()
 
-        # entity and trigger output for a2
-        a2_en_lines_ = []
-        a2_tr_lines_ = []
+            # store modality
+            mod_list = []
 
-        # write entity and trigger only included event predictions
-        # write entity and then trigger
-        for e_pred in ev_en_preds_:
-            e0_id = e_pred[0]
-            e_id = enid_mapping[e0_id]
+            for event_ in events:
 
-            output = ''.join(
-                [e_id, '\t', rev_type_map[e_pred[1]], ' ', str(e_pred[2][0]), ' ', str(e_pred[2][1]), '\t',
-                 e_pred[3], '\n'])
-
-            if e0_id.startswith('TR'):
-                a2_tr_lines_.append(output)
-            elif e0_id.startswith('T'):
-
-                # entity in a2
-                if e0_id in a2_ents_:
-                    a2_tr_lines_.append(output)
+                # create event id
+                evid = convert_evid_to_number(event_[0])
 
+                # lookup in the map or create a new id
+                if evid in f_evid_map:
+                    evid_out = f_evid_map[evid]
                 else:
-                    a2_en_lines_.append(output)
-
-        # event output
-        ev_lines = []
-
-        # count event id
-        f_evid = 0
-
-        # mapping event id to incremental id
-        f_evid_map = collections.OrderedDict()
-
-        # store modality
-        mod_list = []
-
-        for event_ in events:
-
-            # create event id
-            evid = convert_evid_to_number(event_[0])
+                    f_evid += 1
+                    evid_out = f_evid
+                    f_evid_map[evid] = evid_out
+
+                idTR = event_[1][0].replace('TR', 'T')
+                typeEV = rev_type_map[event_[1][1]]
+                args_data = event_[2]
+                mod_pred = event_[3]
+
+                args_output = ''
+                for arg_ in args_data:
+
+                    # relation type
+                    typeR = arg_[0]
+
+                    # check event or entity argument
+                    if len(arg_) > 2:
+                        argIdE = arg_[1]
+                        nest_evid = convert_evid_to_number(argIdE)
+                        if nest_evid in f_evid_map:
+                            nest_evid_out = f_evid_map[nest_evid]
+                            idT = 'E' + str(nest_evid_out)
+                        else:
+                            print('ERROR: NESTED EVENT BUT MISSING EVENT ARGUMENT.')
 
-            # lookup in the map or create a new id
-            if evid in f_evid_map:
-                evid_out = f_evid_map[evid]
-            else:
-                f_evid += 1
-                evid_out = f_evid
-                f_evid_map[evid] = evid_out
-
-            trid = event_[1][0]
-            trid = enid_mapping[trid]
-            typeEV = rev_type_map[event_[1][1]]
-            args_data = event_[2]
-            mod_pred = event_[3]
-
-            args_output = ''
-            for arg_ in args_data:
-
-                # relation type
-                typeR = arg_[0]
-
-                # check event or entity argument
-                if len(arg_) > 2:
-                    argIdE = arg_[1]
-                    nest_evid = convert_evid_to_number(argIdE)
-                    if nest_evid in f_evid_map:
-                        nest_evid_out = f_evid_map[nest_evid]
-                        eid = 'E' + str(nest_evid_out)
+                    # entity argument
                     else:
-                        print('ERROR: NESTED EVENT BUT MISSING EVENT ARGUMENT.')
+                        a2data = arg_[1]
+                        idT = a2data[0].replace('TR', 'T')
 
-                # entity argument
-                else:
-                    a2data = arg_[1]
-                    eid = a2data[0]
+                    if len(args_output) > 0:
+                        args_output += ' '
 
-                    # mapping entity id: predict entity or entity in a2
-                    if params['ner_predict_all'] or eid in a2_ents_:
-                        eid = enid_mapping[eid]
+                    args_output += typeR + ':' + idT
 
+                # if has argument
                 if len(args_output) > 0:
-                    args_output += ' '
+                    o2file.write('E' + str(evid_out) + '\t' + typeEV + ':' + idTR + ' ' + args_output + '\n')
 
-                args_output += typeR + ':' + eid
+                # no argument
+                else:
+                    o2file.write('E' + str(evid_out) + '\t' + typeEV + ':' + idTR + '\n')
 
-            # if has argument
-            if len(args_output) > 0:
-                output = ''.join(['E', str(evid_out), '\t', typeEV, ':', trid, ' ', args_output, '\n'])
-                ev_lines.append(output)
+                # check and store modality
+                if mod_pred > 1:
+                    mod_value = params['mappings']['rev_modality_map'][mod_pred]
+                    mod_list.append([mod_value, evid_out])
 
-            # no argument
-            else:
-                output = ''.join(['E', str(evid_out), '\t', typeEV, ':', trid, '\n'])
-                ev_lines.append(output)
-
-            # check and store modality
-            if mod_pred > 1:
-                mod_value = params['mappings']['rev_modality_map'][mod_pred]
-                mod_list.append([mod_value, evid_out])
-
-        # write modality
-        if len(mod_list) > 0:
-            for mod_id, mod_data in enumerate(mod_list):
-                mod_type = mod_data[0]
-                evid_out = mod_data[1]
-                output = ''.join(['M', str(mod_id + 1), '\t', mod_type, ' ', 'E', str(evid_out), '\n'])
-                ev_lines.append(output)
-
-        # write a2 files
-        with open(a2dir + fid + '.a2', 'w') as o2file:
-
-            # write entity
-            if params['ner_predict_all']:
-                for entity in a2_en_lines_:
-                    o2file.write(entity)
-
-            for trigger in a2_tr_lines_:
-                o2file.write(trigger)
-            for event in ev_lines:
-                o2file.write(event)
-
-        # write ann file
-        with open(anndir + fid + '.a1', 'w') as o1file:
-            for entity in ann_en_lines:
-                o1file.write(entity)
-
-        with open(anndir + fid + '.a2', 'w') as annfile:
-            for entity in ann_en_lines:
-                annfile.write(entity)
-            for trigger in ann_tr_lines:
-                annfile.write(trigger)
-
-            # events are the same for both a2 and ann
-            for event in ev_lines:
-                annfile.write(event)
+            # write modality
+            if len(mod_list) > 0:
+                for mod_id, mod_data in enumerate(mod_list):
+                    mod_type = mod_data[0]
+                    evid_out = mod_data[1]
+                    o2file.write('M' + str(mod_id + 1) + '\t' + mod_type + ' ' + 'E' + str(evid_out) + '\n')
 
     return
 
 
 # generate event output and evaluation
-def write_events(fids, all_ent_preds, all_words, all_offsets, all_span_terms, all_span_indices, all_sub_to_words,
-                 all_ev_preds, g_entity_ids_, params, result_dir):
+def evaluate_ev(fids, all_ent_preds, all_words, all_offsets, all_span_terms, all_span_indices, all_sub_to_words,
+                all_ev_preds, params, gold_dir, result_dir):
     # generate predicted entities
     pred_ents = generate_entities(fids=fids,
                                   all_e_preds=all_ent_preds,
@@ -558,6 +433,70 @@ def write_events(fids, all_ent_preds, all_words, all_offsets, all_span_terms, al
     preds_output = generate_ev_output(pred_ents, pred_evs, params)
 
     # write output to file
-    write_ev_2file(preds_output, pred_ents, result_dir, g_entity_ids_, params)
-
-    return
+    _ = write_ev_2file(preds_output, result_dir, params)
+
+    # calculate score
+    ev_scores = eval_performance(gold_dir, result_dir, params)
+
+    return ev_scores
+
+
+def eval_performance(ref_dir, result_dir, params):
+    # create prediction paths
+    pred_dir = ''.join([result_dir, 'ev-last/ev-ann/'])
+    pred_scores_file = ''.join([result_dir, 'ev-last/', 'ev-scores-', params['task_name'], params['ev_matching'], '.txt'])
+
+    try:
+
+        command = ''.join(
+            ["python " + params['ev_eval_script_path'], " -r ", ref_dir, " -d ", pred_dir, " ", params['ev_matching'],
+             " > ", pred_scores_file])
+
+        # exception for ezcat task
+        if 'ezcat' in params['task_name']:
+            command = ''.join(
+                ["python " + params['ev_eval_script_path'], " -r ", ref_dir, " ", pred_dir, " ",
+                 params['ev_matching'],
+                 " > ", pred_scores_file])
+
+        os.system(command)
+        ev_scores = extract_fscore(pred_scores_file)
+    except Exception as ex:
+        ev_scores = {}
+        logger.exception(ex)
+
+    return ev_scores
+
+
+def extract_fscore(path):
+    file = open(path, 'r')
+    lines = file.readlines()
+    sub_fscore = '0'
+    sub_recall = '0'
+    sub_precision = '0'
+    mod_fscore = '0'
+    mod_recall = '0'
+    mod_precision = '0'
+    tot_fscore = '0'
+    tot_recall = '0'
+    tot_precision = '0'
+    for line in lines:
+        if line.split()[0] == '===[SUB-TOTAL]===':
+            tokens = line.split()
+            sub_recall = tokens[-3]
+            sub_precision = tokens[-2]
+            sub_fscore = tokens[-1]
+        elif line.split()[0] == '==[MOD-TOTAL]==':
+            tokens = line.split()
+            mod_recall = tokens[-3]
+            mod_precision = tokens[-2]
+            mod_fscore = tokens[-1]
+        elif line.split()[0] == '====[TOTAL]====':
+            tokens = line.split()
+            tot_recall = tokens[-3]
+            tot_precision = tokens[-2]
+            tot_fscore = tokens[-1]
+
+    return {'sub_scores': (float(sub_precision.strip()), float(sub_recall.strip()), float(sub_fscore.strip())),
+            'mod_scores': (float(mod_precision.strip()), float(mod_recall.strip()), float(mod_fscore.strip())),
+            'tot_scores': (float(tot_precision.strip()), float(tot_recall.strip()), float(tot_fscore.strip()))}
diff --git a/eval/evalNER.py b/eval/evalNER.py
new file mode 100644
index 0000000..c26ed0a
--- /dev/null
+++ b/eval/evalNER.py
@@ -0,0 +1,210 @@
+# -*- coding: utf-8 -*-
+# © Khoa Duong (dnanhkhoa@live.com)
+from collections import defaultdict
+
+import texttable
+
+
+def precision(tp=0, fp=0):
+    if tp + fp:
+        return tp / (tp + fp)
+    return 0.0
+
+
+def recall(tp=0, fn=0):
+    if tp + fn:
+        return tp / (tp + fn)
+    return 0.0
+
+
+def f_score(precision, recall, beta=1.0):
+    """
+    The beta parameter determines the weight of precision in the combined score. beta < 1 lends more weight to precision, while beta > 1 favors recall (beta -> 0 considers only precision, beta -> inf only recall).
+    """
+    numerator = (1 + beta ** 2) * precision * recall
+    denominator = beta ** 2 * precision + recall
+    if denominator:
+        return numerator / denominator
+    return 0.0
+
+
+def specificity(tn=0, fp=0):
+    # FPR
+    if tn + fp:
+        return tn / (tn + fp)
+    return 0.0
+
+
+def sensitivity(tp=0, fn=0):
+    # TPR
+    return recall(tp, fn)
+
+
+def auc(specificity, sensitivity):
+    return (specificity + sensitivity) / 2
+
+
+def measure(tp=0, tn=0, fp=0, fn=0, beta=1.0, lenient=False):
+    if lenient and tp + fn == 0:
+        tp, tn, fp, fn = 1, 1, 0, 0
+
+    _precision = precision(tp, fp)
+    _recall = recall(tp, fn)
+    _f_score = f_score(_precision, _recall, beta)
+    _specificity = specificity(tn, fp)
+    _sensitivity = sensitivity(tp, fn)
+    _auc = auc(_specificity, _sensitivity)
+
+    return {
+        "precision": _precision * 100,
+        "recall": _recall * 100,
+        "f_score": _f_score * 100,
+        "specificity": _specificity * 100,
+        "sensitivity": _sensitivity * 100,
+        "auc": _auc * 100,
+    }
+
+
+def count(pred_entities, gold_entities, label):
+    assert label, "Label is invalid"
+
+    # Remove duplicates
+    pred_entities = {e: True for e in pred_entities}
+    gold_entities = {e: True for e in gold_entities}
+
+    positions = {**pred_entities, **gold_entities}
+
+    padded_pred_entities, padded_gold_entities = [], []
+
+    for k in positions:
+        if k in pred_entities and k[-1] == label:
+            padded_pred_entities.append(k[-1])
+        else:
+            padded_pred_entities.append(None)
+
+        if k in gold_entities and k[-1] == label:
+            padded_gold_entities.append(k[-1])
+        else:
+            padded_gold_entities.append(None)
+
+    matches = list(zip(padded_pred_entities, padded_gold_entities))
+
+    return {
+        "tp": matches.count((label, label)),
+        "tn": matches.count((None, None)),
+        "fp": matches.count((label, None)),
+        "fn": matches.count((None, label)),
+    }
+
+
+def eval_nner(preds, golds, labels, beta=1.0, lenient=False):
+    num_pred_sentences = len(preds)
+    num_gold_sentences = len(golds)
+    assert num_pred_sentences == num_gold_sentences
+
+    all_scores = []
+    counts = [defaultdict(int) for _ in labels]
+
+    for sentence_id in range(num_gold_sentences):
+        pred_entities = set(preds[sentence_id])
+        gold_entities = set(golds[sentence_id])
+
+        for label_id, label in enumerate(labels):
+            for k, v in count(pred_entities, gold_entities, label).items():
+                counts[label_id][k] += v
+
+    tt = texttable.Texttable()
+    tt.set_cols_width([28] + [10] * 6 + [10] * 3)
+    tt.set_cols_dtype(["t", "f", "f", "f", "f", "f", "f", "i", "i", "i"])
+    tt.set_cols_align(["l"] * 10)
+    tt.header(
+        [
+            "Labels",
+            "Prec.",
+            "Rec.",
+            "F(b={})".format(beta),
+            "Speci.",
+            "Sensi.",
+            "AUC",
+            "Pred.",
+            "Gold.",
+            "Corr.",
+        ]
+    )
+
+    total_counts = defaultdict(int)
+
+    for label_id, label in enumerate(labels):
+        score = measure(
+            counts[label_id]["tp"],
+            counts[label_id]["tn"],
+            counts[label_id]["fp"],
+            counts[label_id]["fn"],
+            beta,
+            lenient,
+        )
+
+        total_counts["tp"] += counts[label_id]["tp"]
+        total_counts["tn"] += counts[label_id]["tn"]
+        total_counts["fp"] += counts[label_id]["fp"]
+        total_counts["fn"] += counts[label_id]["fn"]
+
+        all_scores.append(
+            [
+                label,
+                score["precision"],
+                score["recall"],
+                score["f_score"],
+                score["specificity"],
+                score["sensitivity"],
+                score["auc"],
+                counts[label_id]["tp"] + counts[label_id]["fp"],
+                counts[label_id]["tp"] + counts[label_id]["fn"],
+                counts[label_id]["tp"],
+            ]
+        )
+        tt.add_row(all_scores[-1])
+
+    score = measure(
+        total_counts["tp"],
+        total_counts["tn"],
+        total_counts["fp"],
+        total_counts["fn"],
+        beta,
+        lenient,
+    )
+
+    all_scores.append(
+        [
+            "Overall",
+            score["precision"],
+            score["recall"],
+            score["f_score"],
+            score["specificity"],
+            score["sensitivity"],
+            score["auc"],
+            total_counts["tp"] + total_counts["fp"],
+            total_counts["tp"] + total_counts["fn"],
+            total_counts["tp"],
+        ]
+    )
+    tt.add_row(all_scores[-1])
+
+    return tt.draw(), all_scores
+
+
+if __name__ == "__main__":
+    labels = ["A", "B", "C", "D", "E"]  # DO NOT INCLUDE "O"
+    golds = [
+        [(0, 2, "E"), (0, 2, "A"), (1, 3, "B"), (4, 6, "D")],
+        [(0, 1, "C"), (3, 4, "A")],
+        [(2, 3, "B"), (3, 4, "A"), (4, 5, "C"), (4, 5, "A")],
+    ]
+    preds = [
+        [(0, 2, "E"), (0, 2, "A"), (1, 3, "C"), (4, 6, "D"), (4, 6, "D")],
+        [(0, 1, "C"), (3, 4, "A")],
+        [(2, 3, "B"), (3, 4, "A"), (4, 5, "D"), (4, 5, "A")],
+    ]
+    res, score = eval_nner(preds, golds, labels)
+    print(res)
+    print(score)
diff --git a/eval/evalRE.py b/eval/evalRE.py
index e46d2cd..ba3c077 100644
--- a/eval/evalRE.py
+++ b/eval/evalRE.py
@@ -1,11 +1,54 @@
-import torch
 import os
-import collections
 from collections import defaultdict
 
+import numpy as np
+import torch
+from tabulate import tabulate
+
 from utils.utils import write_lines
 
 
+class MeasureStatistics:
+    """
+    Calculate: True Positives (TP), False Positives (FP), False Negatives (FN)
+    GPU & CPU code
+    """
+
+    def __init__(self, params, beta):
+        self.params = params
+        self.beta = beta
+
+    def __call__(self, *inputs):
+        label_num = self.params['voc_sizes']['rel_size']
+        ignore_label = self.params['lab2ign_id']
+        y, t = inputs
+
+        if label_num is None:
+            label_num = torch.max(t) + 1
+        else:
+            label_num = torch.tensor(label_num)
+
+        mask_t = (t == ignore_label).view(-1)  # where the ground truth needs to be ignored
+        true = torch.where(mask_t, label_num, t.view(-1))  # t: ground truth labels (replace ignored with 13)
+        mask_p = (y == ignore_label).view(-1)  # where the predicted needs to be ignored
+        pred = torch.where(mask_p, label_num, y.view(-1))  # y: output of neural network (replace ignored with 13)
+
+        tp_mask = torch.where(pred == true, true, label_num)
+        fp_mask = torch.where(pred != true, pred, label_num)
+        fn_mask = torch.where(pred != true, true, label_num)
+
+        try:
+            tp = torch.bincount(tp_mask, minlength=label_num + 1)[:label_num]
+            fp = torch.bincount(fp_mask, minlength=label_num + 1)[:label_num]
+            fn = torch.bincount(fn_mask, minlength=label_num + 1)[:label_num]
+        except:
+            tp = torch.zeros(label_num)
+            fp = torch.zeros(label_num)
+            fn = torch.zeros(label_num)
+
+        return tp, fp, fn
+
+
 class SelectClass:
     """
         Correct predictions: From 2 direction relations choose
@@ -24,18 +67,40 @@ def __call__(self, *inputs):
         ignore = torch.tensor(self.params['lab2ign_id'])
 
         cpu_device = torch.device("cpu")
-        y_lr, y_rl = inputs
+        y_lr, y_rl, truth_lr, truth_rl = inputs
         y_lr = y_lr.to(cpu_device)
         y_rl = y_rl.to(cpu_device)
         if self.params['fp16']:
             y_lr = y_lr.float()
             y_rl = y_rl.float()
+        truth_lr = torch.tensor(truth_lr).long()
+        truth_rl = torch.tensor(truth_rl).long()
+
+        no_rel_matched_indices = 0
+        no_rel_matched_types = 0
+
+        try:
+            labels_lr = y_lr.argmax(dim=1).view(-1)
+            labels_rl = y_rl.argmax(dim=1).view(-1)
+        except:
+            return truth_lr, truth_lr, {'no_rel_matched_indices': no_rel_matched_indices,
+                                        'no_rel_matched_types': no_rel_matched_types}
+        m = torch.arange(labels_lr.shape[0])
 
-        labels_lr = y_lr.argmax(dim=1).view(-1)
-        labels_rl = y_rl.argmax(dim=1).view(-1)
+        # count rel matched indices / types
+        if not self.params['predict']:
+            lr_ids = (truth_lr != -1).nonzero().transpose(0, 1)
+            rl_ids = (truth_rl != -1).nonzero().transpose(0, 1)
 
-        m = torch.arange(labels_lr.shape[0])
+            no_rel_matched_indices += (lr_ids.shape[1] + rl_ids.shape[1])
+
+            lr_rel_matched_types = labels_lr[lr_ids] - truth_lr[lr_ids]
+            rl_rel_matched_types = labels_rl[rl_ids] - truth_rl[rl_ids]
+
+            no_rel_matched_types += (
+                    (lr_rel_matched_types == 0).nonzero().shape[0] + (rl_rel_matched_types == 0).nonzero().shape[0])
 
+        # split predictions into 2 arrays: relations + inv-relations
         lr_probs = y_lr[m, labels_lr]
         rl_probs = y_rl[m, labels_rl]
         inv_lr = labmap[labels_lr]
@@ -53,14 +118,20 @@ def __call__(self, *inputs):
                          rl_probs, negative_val.float())
         a_x4 = torch.where((a4 >= a5) & (a4 != -1) & (a5 != -1), labels_lr, negative_val)
         a_x5 = torch.where((a4 < a5) & (a4 != -1) & (a5 != -1), inv_rl, negative_val)
+        # a_x5 = torch.where((a4 < a5) & (a4 != -1) & (a5 != -1), labels_rl, negative_val)
 
         # # if both are positive with inverse 1:rel:2 & 2:rel:1 (this is correct) --> keep them the 'rel' label
         a_x6 = torch.where((labels_lr != labels_rl) & (labels_lr != ignore) &
                            (labels_rl != ignore) & (inv_lr == labels_rl), labels_lr, negative_val)
+        # If we don't care LR, we don't need a_x6
+        # a_x6 = torch.where((labels_lr != labels_rl) & (labels_lr != ignore) &
+        #                    (labels_rl != ignore) & (inv_even == odd_labels), even_labels, negative_val_long)
 
         # if one positive & one negative --> choose the positive class
         a_x2 = torch.where((labels_lr != labels_rl) & (labels_lr == ignore) & (labels_rl != ignore),
                            inv_rl, negative_val)
+        # a_x2 = torch.where((labels_lr != labels_rl) & (labels_lr == ignore) & (labels_rl != ignore),
+        #                    labels_rl, negative_val)
         a_x3 = torch.where((labels_lr != labels_rl) & (labels_lr != ignore) & (labels_rl == ignore),
                            labels_lr, negative_val)
 
@@ -71,24 +142,149 @@ def __call__(self, *inputs):
         a8 = torch.where(
             (labels_lr != ignore) & (labels_rl != ignore) & (labels_lr != labels_rl) & (inv_lr != labels_rl), rl_probs,
             negative_val.float())
-
+        # a7 = torch.where((labels_lr != ignore) & (labels_rl != ignore) & (labels_lr != labels_rl), lr_probs,
+        #                  negative_val.float())
+        # a8 = torch.where((labels_lr != ignore) & (labels_rl != ignore) & (labels_lr != labels_rl), rl_probs,
+        #                  negative_val.float())
         a_x7 = torch.where((a7 >= a8) & (a7 != -1) & (a8 != -1), labels_lr, negative_val)
         a_x8 = torch.where((a7 < a8) & (a7 != -1) & (a8 != -1), inv_rl, negative_val)
+        # a_x8 = torch.where((a7 < a8) & (a7 != -1) & (a8 != -1), labels_rl, negative_val)
 
         fin = torch.stack([a_x1, a_x2, a_x3, a_x4, a_x5, a_x6, a_x7, a_x8])
+        # fin = torch.stack([a_x1, a_x2, a_x3, a_x4, a_x5, a_x7, a_x8])
         assert (torch.sum(torch.clamp(fin, min=-1.0, max=0.0), dim=0) == -7).all(), "check evaluation"
+        # assert (torch.sum(torch.clamp(fin, min=-1.0, max=0.0), dim=0) == -6).all(), "check evaluation"
         fin_preds = torch.max(fin, dim=0)
+        fin_truth = truth_lr
+
+        return fin_preds[0], fin_truth, {'no_rel_matched_indices': no_rel_matched_indices,
+                                         'no_rel_matched_types': no_rel_matched_types}
 
-        return fin_preds[0]
 
+def calc_stats(preds, ts, params):
+    new_preds, new_ts, no_matched_rels = SelectClass(params)(preds[0], preds[1], ts[0], ts[1])
+    tp_, fp_, fn_ = MeasureStatistics(params, 1.0)(new_preds, new_ts)
+    return new_preds, new_ts, no_matched_rels, tp_, fp_, fn_
 
-def calc_stats(preds, params):
-    new_preds = SelectClass(params)(preds[0], preds[1])
-    return new_preds
 
+def fbeta_score(precision, recall, beta=1.0):
+    beta_square = beta * beta
+    if (precision != 0.0) and (recall != 0.0):
+        res = ((1 + beta_square) * precision * recall / (beta_square * precision + recall)).astype(precision.dtype)
+    else:
+        res = 0.0
+    return res
+
+
+def estimate_perf(all_tp, all_fp, all_fn, params):
+    """
+        Estimate performance: micro and macro average precision, recall, F1 score.
+        CPU - based
+    """
+    lab_map = params['lab_map']
+    class_size = params['voc_sizes']['rel_size']
+    lab2ign = params['lab2ign_id']
+
+    all_tp = np.sum(all_tp, axis=0)
+    all_fp = np.sum(all_fp, axis=0)
+    all_fn = np.sum(all_fn, axis=0)
+    atp = np.sum(all_tp)
+    afp = np.sum(all_fp)
+    afn = np.sum(all_fn)
+    micro_p = (1.0 * atp) / (atp + afp) if (atp + afp != 0) else 0.0
+    micro_r = (1.0 * atp) / (atp + afn) if (atp + afn != 0) else 0.0
+    micro_f = fbeta_score(micro_p, micro_r)
+
+    # macro (merge directions l2r+r2l)
+    ctp = []
+    cfp = []
+    cfn = []
+    seen = []
+
+    # Without L R distinguishing
+    # for i in range(0, class_size):
+    #     if i == lab2ign:  # don't include other class
+    #         continue
+    #     elif (i in seen):
+    #         continue
+    #     else:
+    #         ctp.append(all_tp[i])
+    #         cfp.append(all_fp[i])
+    #         cfn.append(all_fn[i])
+    #         seen.append(i)
+
+    # With L R distinguishing
+    for i in range(0, class_size):
+        if i == lab2ign:  # don't include other class
+            continue
+        elif (i in seen) or (lab_map[i] in seen):
+            continue
+        else:
+            ctp.append(all_tp[i] + all_tp[lab_map[i]])
+            cfp.append(all_fp[i] + all_fp[lab_map[i]])
+            cfn.append(all_fn[i] + all_fn[lab_map[i]])
+            seen.append(i)
+            seen.append(lab_map[i])
+
+    pp = []
+    rr = []
+    ff = []
+    for j in range(0, len(ctp)):
+        pp.append((1.0 * ctp[j]) / (ctp[j] + cfp[j]) if (ctp[j] + cfp[j]) != 0 else 0.0)
+        rr.append((1.0 * ctp[j]) / (ctp[j] + cfn[j]) if (ctp[j] + cfn[j]) != 0 else 0.0)
+        ff.append(fbeta_score(pp[j], rr[j]))
+    assert len(pp) == len(rr) == len(ff)
+
+    # show performance on each class
+    if params['show_macro']:
+        gg = [ii for ii in range(0, class_size) if ii % 2 == 0][:-1]
+        lab_val = []
+        for i in range(0, len(pp)):
+            lab_val.append([params['mappings']['rev_rel_map'][gg[i]].split(':')[1], pp[i], rr[i], ff[i]])
+        print(tabulate(lab_val, headers=['Class', 'P', 'R', 'F1'], tablefmt='orgtbl'))
+
+    macro_p = np.mean(pp)
+    macro_r = np.mean(rr)
+    macro_f = np.mean(ff)
+    return {'micro_p': micro_p, 'micro_r': micro_r, 'micro_f': micro_f,
+            'macro_p': macro_p, 'macro_r': macro_r, 'macro_f': macro_f}
+
+
+def get_entity_attrs(e_span_indice, words, offsets, sub_to_words):
+    e_words = []
+    e_offset = [-1, -1]
+    curr_word_idx = -1
+    for idx in range(e_span_indice[0], e_span_indice[1] + 1):
+        if sub_to_words[idx] != curr_word_idx:
+            e_words.append(words[sub_to_words[idx]])
+            curr_word_idx = sub_to_words[idx]
+        if idx == e_span_indice[0]:
+            e_offset[0] = offsets[sub_to_words[idx]][0]
+        if idx == e_span_indice[1]:
+            e_offset[1] = offsets[sub_to_words[idx]][1]
+    return ' '.join(e_words), (e_offset[0], e_offset[1])
 
-def write_entity_relations(result_dir, fidss, ent_anns, rel_anns, params):
-    # def gen_annotation(fidss, ent_anns, rel_anns, params, result_dir):
+
+def estimate_rel(ref_dir, result_dir, fids, ent_anns, rel_anns, params):
+    """Evaluate entity and relation performance using n2c2 script"""
+
+    # generate brat prediction
+    gen_annotation(fids, ent_anns, rel_anns, params, result_dir)
+
+    # calculate scores
+    pred_dir = ''.join([result_dir, 'rel-last/rel-ann/'])
+    pred_scores_file = ''.join([result_dir, 'rel-last/rel-scores-', params['ner_eval_corpus'], '.txt'])
+
+    # run evaluation, output in the score file
+    eval_performance(ref_dir, pred_dir, result_dir, pred_scores_file, params)
+
+    # extract scores
+    scores = extract_fscore(pred_scores_file)
+
+    return scores
+
+
+def gen_annotation(fidss, ent_anns, rel_anns, params, result_dir):
     """Generate entity and relation prediction"""
 
     dir2wr = ''.join([result_dir, 'rel-last/rel-ann/'])
@@ -99,7 +295,6 @@ def write_entity_relations(result_dir, fidss, ent_anns, rel_anns, params):
 
     # Initial ent+rel map
     map = defaultdict()
-
     for fids in fidss:
         for fid in fids:
             map[fid] = {'ents': {}, 'rels': {}}
@@ -116,11 +311,14 @@ def write_entity_relations(result_dir, fidss, ent_anns, rel_anns, params):
             sub_to_words = ent_ann['sub_to_words'][xb]
 
             entities = map[fid]['ents']
+            # e_count = len(entities) + 1
 
             for x, pair in enumerate(span_indices):
                 if pair[0].item() == -1:
                     break
                 if ner_preds[x] > 0:
+                    # e_id = 'T' + str(e_count)
+                    # e_count += 1
                     try:
                         e_id = ner_terms.id2term[x]
                         e_type = params['mappings']['rev_type_map'][
@@ -133,12 +331,10 @@ def write_entity_relations(result_dir, fidss, ent_anns, rel_anns, params):
                                 e_words, e_offset = get_entity_attrs(pair, words, offsets, sub_to_words)
                         else:
                             e_words, e_offset = get_entity_attrs(pair, words, offsets, sub_to_words)
-
-                        # save entity map
+                        # entity_map[(xb, (pair[0].item(), pair[1].item()))] = (
+                        #     ner_preds[x], e_id, e_type, e_words, e_offset)
                         entity_map[(xb, x)] = (
                             ner_preds[x], e_id, e_type, e_words, e_offset)
-
-                        # save entity dic info
                         entities[e_id] = {"id": e_id, "type": e_type, "start": e_offset[0], "end": e_offset[1],
                                           "ref": e_words}
                     except KeyError as error:
@@ -147,7 +343,13 @@ def write_entity_relations(result_dir, fidss, ent_anns, rel_anns, params):
             # Mapping relations
             pairs_idx = rel_ann['pairs_idx']
             rel_preds = rel_ann['rel_preds']
+            # positive_indices = rel_ann['positive_indices']
 
+            # if positive_indices:
+            # pairs_idx_i = pairs_idx[0][positive_indices]
+            # pairs_idx_j = pairs_idx[1][positive_indices]
+            # pairs_idx_k = pairs_idx[2][positive_indices]
+            # else:
             pairs_idx_i = pairs_idx[0]
             pairs_idx_j = pairs_idx[1]
             pairs_idx_k = pairs_idx[2]
@@ -160,8 +362,12 @@ def write_entity_relations(result_dir, fidss, ent_anns, rel_anns, params):
                 k = pairs_idx_k[x]
                 rel = rel_preds[x].item()
                 role = params['mappings']['rev_rel_map'][rel].split(":")[1]
+                # role = params['mappings']['rev_rtype_map'][rel]
                 if role != 'Other':
-
+                    # arg1s = entity_map[
+                    #     (i.item(), (ent_ann['span_indices'][i][j][0].item(), ent_ann['span_indices'][i][j][1].item()))]
+                    # arg2s = entity_map[
+                    #     (i.item(), (ent_ann['span_indices'][i][k][0].item(), ent_ann['span_indices'][i][k][1].item()))]
                     try:
                         arg1s = entity_map[(i.item(), j.item())]
                         arg2s = entity_map[(i.item(), k.item())]
@@ -181,78 +387,37 @@ def write_entity_relations(result_dir, fidss, ent_anns, rel_anns, params):
                     except KeyError as error:
                         print('error relation', fids[i], error)
 
-    for fid, ners_rels in map.items():
-        write_annotation_file(dir2wr, fid, entities=ners_rels['ents'],
-                              relations=ners_rels['rels'])
-
-
-def get_entity_attrs(e_span_indice, words, offsets, sub_to_words):
-    e_words = []
-    e_offset = [-1, -1]
-    curr_word_idx = -1
-    for idx in range(e_span_indice[0], e_span_indice[1] + 1):
-        if sub_to_words[idx] != curr_word_idx:
-            e_words.append(words[sub_to_words[idx]])
-            curr_word_idx = sub_to_words[idx]
-        if idx == e_span_indice[0]:
-            e_offset[0] = offsets[sub_to_words[idx]][0]
-        if idx == e_span_indice[1]:
-            e_offset[1] = offsets[sub_to_words[idx]][1]
-    return ' '.join(e_words), (e_offset[0], e_offset[1])
-
-
-def mapping_entity_id(entities_):
-    eid = 1
-    enid_mapping = collections.OrderedDict()
-    en_preds_out_ = []
-
-    # create mapping for entity id first
-    for en_id, en_data in entities_.items():
-
-        if en_id.startswith('TR'):
-            continue
-
-        elif en_id.startswith('T'):
-            enid_mapping[en_id] = 'T' + str(eid)
-            eid += 1
-            en_preds_out_.append(en_data)
-
-    # creat mapping for trigger id
-    for en_id, en_data in entities_.items():
-
-        if en_id.startswith('TR'):
-            enid_mapping[en_id] = 'T' + str(eid)
-            eid += 1
-            en_preds_out_.append(en_data)
-
-    return enid_mapping, en_preds_out_
+                    # r_id = 'R' + str(r_count)
+                    # r_count += 1
+                    # relations[r_id] = {"id": r_id, "role": role,
+                    #                    "left_arg": {"label": "Arg1", "id": arg2},
+                    #                    "right_arg": {"label": "Arg2", "id": arg1}}
 
+    for fid, ners_rels in map.items():
+        write_annotation_file(ann_file=dir2wr + fid + '.ann', entities=ners_rels['ents'], relations=ners_rels['rels'])
 
-def write_annotation_file(dir2wr, fid, entities=None, relations=None):
-    re_lines = []
-    en_lines = []
-    tr_lines = []
 
-    # entity id mapping
-    enid_mapping, en_preds_out_ = mapping_entity_id(entities)
+def write_annotation_file(
+        ann_file, entities=None, triggers=None, relations=None, events=None
+):
+    lines = []
 
-    if entities:
-        for entity in en_preds_out_:
+    def annotate_text_bound(entities):
+        for entity in entities.values():
             entity_annotation = "{}\t{} {} {}\t{}".format(
-                enid_mapping[entity["id"]],
+                entity["id"],
                 entity["type"],
                 entity["start"],
                 entity["end"],
                 entity["ref"],
             )
+            lines.append(entity_annotation)
 
-            re_lines.append(entity_annotation)
-
-            if entity["id"].startswith('TR'):
-                tr_lines.append(entity_annotation)
+    if entities:
+        annotate_text_bound(entities)
 
-            elif entity["id"].startswith('T'):
-                en_lines.append(entity_annotation)
+    if triggers:
+        annotate_text_bound(triggers)
 
     if relations:
         for relation in relations.values():
@@ -260,17 +425,86 @@ def write_annotation_file(dir2wr, fid, entities=None, relations=None):
                 relation["id"],
                 relation["role"],
                 relation["left_arg"]["label"],
-                enid_mapping[relation["left_arg"]["id"]],
+                relation["left_arg"]["id"],
                 relation["right_arg"]["label"],
-                enid_mapping[relation["right_arg"]["id"]],
+                relation["right_arg"]["id"],
             )
-            re_lines.append(relation_annotation)
+            lines.append(relation_annotation)
 
-    # write to file
-    re_file = ''.join([dir2wr, fid, '-RE.ann'])
-    en_file = ''.join([dir2wr, fid, '-EN.ann'])
-    tr_file = ''.join([dir2wr, fid, '-TR.ann'])
-
-    write_lines(re_lines, re_file)
-    write_lines(en_lines, en_file)
-    write_lines(tr_lines, tr_file)
+    if events:
+        for event in events.values():
+            event_annotation = "{}\t{}:{}".format(
+                event["id"], event["trigger_type"], event["trigger_id"]
+            )
+            for arg in event["args"]:
+                event_annotation += " {}:{}".format(arg["role"], arg["id"])
+            lines.append(event_annotation)
+
+    write_lines(lines, ann_file)
+
+
+def eval_performance(ref_dir, pred_dir, result_dir, pred_scores_file, params):
+    # run evaluation script
+
+    command = ''.join(
+        ["python ", params['rel_eval_script_path'], " --ner-eval-corpus ", params['ner_eval_corpus'], " ", ref_dir, " ",
+         pred_dir, " > ", pred_scores_file])
+    os.system(command)
+
+    # if predict: run for all config
+    if params['predict'] == True:
+        # entiy scores only
+        ner_eval_corpus = ''.join([params['task_name'], '_en'])
+        pred_scores_file = ''.join([result_dir, 'rel-last/rel-scores-', ner_eval_corpus, '.txt'])
+        command = ''.join(
+            ["python ", params['rel_eval_script_path'], " --ner-eval-corpus ", ner_eval_corpus, " ", ref_dir,
+             " ",
+             pred_dir, " > ", pred_scores_file])
+        os.system(command)
+
+        # trigger scores only
+        ner_eval_corpus = ''.join([params['task_name'], '_tr'])
+        pred_scores_file = ''.join([result_dir, 'rel-last/rel-scores-', ner_eval_corpus, '.txt'])
+        command = ''.join(
+            ["python ", params['rel_eval_script_path'], " --ner-eval-corpus ", ner_eval_corpus, " ", ref_dir,
+             " ",
+             pred_dir, " > ", pred_scores_file])
+        os.system(command)
+
+
+def extract_fscore(path):
+    file = open(path, 'r')
+    lines = file.readlines()
+    report = defaultdict()
+    report['NER'] = defaultdict()
+    report['REL'] = defaultdict()
+
+    ent_or_rel = ''
+    for line in lines:
+        if '*' in line and 'TRACK' in line:
+            ent_or_rel = 'NER'
+        elif '*' in line and 'RELATIONS' in line:
+            ent_or_rel = 'REL'
+        elif len(line.split()) > 0 and line.split()[0] == 'Overall':
+            tokens = line.split()
+            if len(tokens) > 8:
+                strt_f, strt_r, strt_p, soft_f, soft_r, soft_p \
+                    = tokens[-7], tokens[-8], tokens[-9], tokens[-4], tokens[-5], tokens[-6]
+            else:
+                strt_f, strt_r, strt_p, soft_f, soft_r, soft_p \
+                    = tokens[-4], tokens[-5], tokens[-6], tokens[-1], tokens[-2], tokens[-3]
+            if line.split()[1] == '(micro)':
+                mi_or_mc = 'micro'
+            elif line.split()[1] == '(macro)':
+                mi_or_mc = 'macro'
+            else:
+                mi_or_mc = ''
+            if mi_or_mc != '':
+                report[ent_or_rel][mi_or_mc] = {'st_f': float(strt_f.strip()) * 100,
+                                                'st_r': float(strt_r.strip()) * 100,
+                                                'st_p': float(strt_p.strip()) * 100,
+                                                'so_f': float(soft_f.strip()) * 100,
+                                                'so_r': float(soft_r.strip()) * 100,
+                                                'so_p': float(soft_p.strip()) * 100}
+
+    return report
diff --git a/eval/evaluate.py b/eval/evaluate.py
index 716006b..7033e98 100644
--- a/eval/evaluate.py
+++ b/eval/evaluate.py
@@ -1,13 +1,19 @@
+import time
+
 import torch
 from tqdm import tqdm
 
-from eval.evalRE import write_entity_relations
-from eval.evalEV import write_events
+from eval.evalEV import evaluate_ev
+from eval.evalRE import estimate_perf, estimate_rel
+from eval.evalNER import eval_nner
+from scripts.pipeline_process import gen_ner_ann_files, gen_rel_ann_files
 from utils import utils
+from utils.utils import _humanized_time
 
 
-def predict(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, params):
+def eval(model, eval_dir, result_dir, eval_dataloader, eval_data, params, epoch=0):
     mapping_id_tag = params['mappings']['nn_mapping']['id_tag_mapping']
+    rel_tp_tr, rel_fp_tr, rel_fn_tr = [], [], []
 
     # store predicted entities
     ent_preds = []
@@ -17,15 +23,20 @@ def predict(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, params
 
     fidss, wordss, offsetss, sub_to_wordss, span_indicess = [], [], [], [], []
 
-    # entity and relation output
-    ent_anns = []
     rel_anns = []
+    ent_anns = []
 
     # Evaluation phase
     model.eval()
 
+    # nner
     all_ner_preds, all_ner_golds, all_ner_terms = [], [], []
+    total_rel_matched_indices = 0
+    total_rel_matched_types = 0
 
+    t_start = time.time()
+
+    is_eval_rel = False
     is_eval_ev = False
 
     for step, batch in enumerate(
@@ -34,8 +45,8 @@ def predict(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, params
         eval_data_ids = batch
         tensors = utils.get_tensors(eval_data_ids, eval_data, params)
 
-        nn_tokens, nn_ids, nn_token_mask, nn_attention_mask, nn_span_indices, nn_span_labels, nn_span_labels_match_rel, nn_entity_masks, nn_trigger_masks, _, \
-        etypes, _ = tensors
+        nn_tokens, nn_ids, nn_token_mask, nn_attention_mask, nn_span_indices, nn_span_labels, nn_span_labels_match_rel, nn_entity_masks, nn_trigger_masks, nn_gtruth, nn_l2r, _, \
+        nn_truth_ev, nn_ev_idxs, ev_lbls, etypes, _ = tensors
 
         fids = [
             eval_data["fids"][data_id] for data_id in eval_data_ids[0].tolist()
@@ -61,11 +72,34 @@ def predict(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, params
         ]
 
         with torch.no_grad():
-            ner_out, rel_out, ev_out = model(tensors, params)
+            if not params['predict']:
+                ner_out, rel_out, ev_out, loss = model(tensors, epoch)
+            else:
+                ner_out, rel_out, ev_out, loss = model(tensors)
 
         ner_preds = ner_out['preds']
 
-        ner_terms = ner_out['terms']
+        if not params['predict']:  # Debug only
+            # Case train REL only
+            if params['skip_ner'] and params['rel_epoch'] >= (params['epoch'] - 1) and params['use_gold_ner']:
+                ner_terms = ner_out['gold_terms']
+                ner_preds = ner_out['golds']
+            # Case train EV only
+            elif params['skip_ner'] and params['skip_rel'] and params['use_gold_ner'] \
+                    and params['use_gold_rel']:
+                ner_terms = ner_out['gold_terms']
+                ner_preds = ner_out['golds']
+            else:
+                ner_terms = ner_out['terms']
+        else:
+            if params['gold_eval'] or params['pipelines']:
+                if params['pipelines'] and params['pipe_flag'] == 0:
+                    ner_terms = ner_out['terms']
+                else:
+                    ner_terms = ner_out['gold_terms']
+                    ner_preds = ner_out['golds']
+            else:
+                ner_terms = ner_out['terms']
 
         all_ner_terms.append(ner_terms)
 
@@ -102,35 +136,87 @@ def predict(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, params
                     )
             all_ner_preds.append(pred_entities)
 
-        # entity prediction
-        ent_ann = {'span_indices': nn_span_indices, 'ner_preds': ner_out['preds'], 'words': words,
-                   'offsets': offsets, 'sub_to_words': sub_to_words, 'subwords': subwords,
-                   'ner_terms': ner_terms}
-        ent_anns.append(ent_ann)
-
         fidss.append(fids)
+        if params['predict']:
+            if params['gold_eval'] or params['pipelines']:
+                if params['pipelines'] and params['pipe_flag'] == 0:
+                    ent_ann = {'span_indices': nn_span_indices, 'ner_preds': ner_out['preds'], 'words': words,
+                               'offsets': offsets, 'sub_to_words': sub_to_words, 'subwords': subwords,
+                               'ner_terms': ner_terms}
+                else:
+                    ent_ann = {'span_indices': nn_span_indices, 'ner_preds': ner_out['golds'], 'words': words,
+                               'offsets': offsets, 'sub_to_words': sub_to_words, 'subwords': subwords,
+                               'ner_terms': ner_terms}
+            else:
+                ent_ann = {'span_indices': nn_span_indices, 'ner_preds': ner_out['preds'], 'words': words,
+                           'offsets': offsets, 'sub_to_words': sub_to_words, 'subwords': subwords,
+                           'ner_terms': ner_terms}
+        else:
+            # Case only train REL
+            if params['skip_ner'] and params['rel_epoch'] >= (params['epoch'] - 1) and params['use_gold_ner']:
+                ent_ann = {'span_indices': nn_span_indices, 'ner_preds': ner_out['golds'], 'words': words,
+                           'offsets': offsets, 'sub_to_words': sub_to_words, 'subwords': subwords,
+                           'ner_terms': ner_terms}
+            # Case only train EV
+            elif params['skip_ner'] and params['skip_rel'] and params['use_gold_rel']:
+                ent_ann = {'span_indices': nn_span_indices, 'ner_preds': ner_out['golds'], 'words': words,
+                           'offsets': offsets, 'sub_to_words': sub_to_words, 'subwords': subwords,
+                           'ner_terms': ner_terms}
+            else:
+                ent_ann = {'span_indices': nn_span_indices, 'ner_preds': ner_out['preds'], 'words': words,
+                           'offsets': offsets, 'sub_to_words': sub_to_words, 'subwords': subwords,
+                           'ner_terms': ner_terms}
+
+        ent_anns.append(ent_ann)
 
         wordss.append(words)
         offsetss.append(offsets)
         sub_to_wordss.append(sub_to_words)
 
-        # relation prediction
         if rel_out != None:
-            pairs_idx = rel_out['pairs_idx']
-            rel_pred = rel_out['preds']
+            rel_tp_tr.append(rel_out['true_pos'].tolist())
+            rel_fp_tr.append(rel_out['false_pos'].tolist())
+            rel_fn_tr.append(rel_out['false_neg'].tolist())
+            total_rel_matched_indices += rel_out['no_matched_rel']['no_rel_matched_indices']
+            total_rel_matched_types += rel_out['no_matched_rel']['no_rel_matched_types']
+
+            if params['predict']:
+                if params['gold_eval'] or params['pipelines']:
+                    if params['pipelines'] and params['pipe_flag'] != 2:
+                        pairs_idx = rel_out['pairs_idx']
+                        rel_pred = rel_out['preds']
+                    else:
+                        pairs_idx = rel_out['l2r']
+                        rel_pred = rel_out['truth']
+                else:
+                    pairs_idx = rel_out['pairs_idx']
+                    rel_pred = rel_out['preds']
+            else:
+                # Case only train REL
+                if params['skip_ner'] and params['rel_epoch'] >= (params['epoch'] - 1) \
+                        and params['use_gold_ner']:
+                    pairs_idx = rel_out['l2r']
+                    rel_pred = rel_out['preds']
+                # Case only train EV
+                elif params['skip_ner'] and params['skip_rel'] and params['use_gold_rel']:
+                    pairs_idx = rel_out['l2r']
+                    rel_pred = rel_out['truth']
+                else:
+                    pairs_idx = rel_out['pairs_idx']
+                    rel_pred = rel_out['preds']
 
             rel_ann = {'pairs_idx': pairs_idx, 'rel_preds': rel_pred}
             rel_anns.append(rel_ann)
+            is_eval_rel = True
         else:
             rel_anns.append({})
 
-        # event prediction
         if ev_out != None:
             # add predicted entity
             ent_preds.append(ner_out["nner_preds"])
 
             # add predicted events
-            ev_preds.append(ev_out)
+            ev_preds.append(ev_out['output'])
 
             span_indicess.append(
                 [
@@ -148,24 +234,273 @@ def predict(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, params
         # Clear GPU unused RAM:
         if params['gpu'] >= 0:
             torch.cuda.empty_cache()
-    # write entity and relation prediction
-    _ = write_entity_relations(
-        result_dir=result_dir,
-        fidss=fidss,
-        ent_anns=ent_anns,
-        rel_anns=rel_anns,
-        params=params
-    )
 
+    if params['predict'] and params['pipelines']:
+        if params['pipe_flag'] == 0:
+            gen_ner_ann_files(fidss, ent_anns, params)
+            return
+        elif params['pipe_flag'] == 1:
+            gen_rel_ann_files(fidss, ent_anns, rel_anns, params)
+            return
+
+    # Do estimations here
+    labels = params["mappings"]["nn_mapping"]["trigger_labels"]
+    if params["ner_predict_all"]:
+        labels = params["mappings"]["nn_mapping"]["full_labels"]
+
+    ner_res, ner_score = eval_nner(all_ner_preds, all_ner_golds, labels)
+    scores = estimate_rel(ref_dir=eval_dir,
+                          result_dir=result_dir,
+                          fids=fidss,
+                          ent_anns=ent_anns,
+                          rel_anns=rel_anns,
+                          params=params)
+    if is_eval_rel:
+        tr_scores = estimate_perf(rel_tp_tr, rel_fp_tr, rel_fn_tr, params)
+    else:
+        tr_scores = {'micro_p': 0, 'micro_r': 0, 'micro_f': 0}
     if is_eval_ev > 0:
-        write_events(fids=fidss,
-                     all_ent_preds=ent_preds,
-                     all_words=wordss,
-                     all_offsets=offsetss,
-                     all_span_terms=all_ner_terms,
-                     all_span_indices=span_indicess,
-                     all_sub_to_words=sub_to_wordss,
-                     all_ev_preds=ev_preds,
-                     g_entity_ids_=g_entity_ids_,
-                     params=params,
-                     result_dir=result_dir)
+        ev_scores = evaluate_ev(fids=fidss,
+                                all_ent_preds=ent_preds,
+                                all_words=wordss,
+                                all_offsets=offsetss,
+                                all_span_terms=all_ner_terms,
+                                all_span_indices=span_indicess,
+                                all_sub_to_words=sub_to_wordss,
+                                all_ev_preds=ev_preds,
+                                params=params,
+                                gold_dir=eval_dir,
+                                result_dir=result_dir)
+    else:
+        ev_scores = {}
+
+    # Print estimation scores here
+    if not params['predict'] or (params['predict'] and not params['gold_eval']):
+        print()
+        print('-----OUR EVALUATIONS (NOT RECOMMEND)-----')
+        print()
+        print(ner_res)
+        print()
+        print(
+            "ENT: P/R/F1 = {:.02f}\t{:.02f}\t{:.02f} ".format(ner_score[-1][1], ner_score[-1][2],
+                                                              ner_score[-1][3]), end="",
+        )
+        print()
+        print(
+            "REL: P/R/F1 = {:.02f}\t{:.02f}\t{:.02f} ".format(
+                tr_scores["micro_p"] * 100,
+                tr_scores["micro_r"] * 100,
+                tr_scores["micro_f"] * 100,
+            ),
+            end="",
+        )
+        print()
+        print('Total matched indice relations', total_rel_matched_indices)
+        print('Total matched type relations', total_rel_matched_types)
+
+    print()
+    print('-----EVALUATING BY N2C2 SCRIPT (FOR ENT & REL)-----')
+    print()
+    print('STRICT_MATCHING:')
+    print_scores('NER', scores['NER'], 'st')
+    print()
+    print('SOFT_MATCHING:')
+    print_scores('NER', scores['NER'], 'so')
+    if is_eval_rel:
+        print()
+        print('STRICT_MATCHING:')
+        print_scores('REL', scores['REL'], 'st')
+        print()
+        print('SOFT_MATCHING:')
+        print_scores('REL', scores['REL'], 'so')
+    else:
+        if params['skip_rel']:
+            print('Not evaluate REL')
+        else:
+            print('No relation')
+    print()
+    print('-----EVALUATING BY SCRIPT (FOR EV)-----')
+    print()
+    if len(ev_scores) > 0:
+        sub_p, sub_r, sub_f = ev_scores['sub_scores'][0], ev_scores['sub_scores'][1], ev_scores['sub_scores'][2]
+        mod_p, mod_r, mod_f = ev_scores['mod_scores'][0], ev_scores['mod_scores'][1], ev_scores['mod_scores'][2]
+        tot_p, tot_r, tot_f = ev_scores['tot_scores'][0], ev_scores['tot_scores'][1], ev_scores['tot_scores'][2]
+        print('SUB : P/R/F1 = {:.02f}\t{:.02f}\t{:.02f} '.format(sub_p, sub_r, sub_f), end="")
+        print()
+        print('MOD : P/R/F1 = {:.02f}\t{:.02f}\t{:.02f} '.format(mod_p, mod_r, mod_f), end="")
+        print()
+        print('TOT : P/R/F1 = {:.02f}\t{:.02f}\t{:.02f} '.format(tot_p, tot_r, tot_f), end="")
+        print()
+    else:
+        print('No event/Not evaluate EV/error when evaluating by CG script')
+        print()
+    print()
+    print()
+    t_end = time.time()
+    print('Elapsed time: {}'.format(_humanized_time(t_end - t_start)))
+    print()
+
+    # Do saving models
+    if not params['predict']:
+        # ! ========== KHOA WAS HERE ==========
+        ner_f1score = ner_score[-1][3]
+
+        # ner_fscore = ner_f1score * 100
+        # get the best score by n2c2 instead
+        ner_fscore = scores['NER']['micro']['st_f']
+
+        if is_eval_rel:
+            rel_fscore = scores['REL']['micro']['st_f']
+        else:
+            rel_fscore = 0
+
+        if len(ev_scores) > 0:
+            ev_fscore = ev_scores['tot_scores'][2]
+        else:
+            ev_fscore = 0
+
+        if params['ner_epoch'] >= (params['epoch'] - 1):
+            best_score = ner_fscore
+        elif params['rel_epoch'] >= (params['epoch'] - 1):
+            best_score = rel_fscore
+        else:
+            best_score = ev_fscore
+        # Save models:
+        if params['save_ner']:
+            ner_model_path = params['ner_model_dir']
+            utils.handle_checkpoints(
+                model=model.NER_layer,
+                checkpoint_dir=ner_model_path,
+                params={
+                    "filename": "ner_base",
+                    "epoch": epoch,
+                    "fscore": ner_fscore,
+                    "ner_fscore": ner_fscore,
+                    "rel_fscore": rel_fscore,
+                    "ev_fscore": ev_fscore,
+                    'device': params['device']
+                },
+                filter_func=utils.save_best_fscore,
+                num_saved=1
+            )
+
+        if params['save_rel']:
+            rel_model_path = params['rel_model_dir']
+            utils.handle_checkpoints(
+                model=model.REL_layer,
+                checkpoint_dir=rel_model_path,
+                params={
+                    "filename": "rel_base",
+                    "epoch": epoch,
+                    "fscore": rel_fscore,
+                    "ner_fscore": ner_fscore,
+                    "rel_fscore": rel_fscore,
+                    "ev_fscore": ev_fscore,
+                    'device': params['device']
+                },
+                filter_func=utils.save_best_fscore,
+                num_saved=1
+            )
+            if params['save_model_pipeline']:
+                ner_model_path = params['ner_model_dir']
+                utils.handle_checkpoints(
+                    model=model.NER_layer,
+                    checkpoint_dir=ner_model_path,
+                    params={
+                        "filename": "rel_base",
+                        "epoch": epoch,
+                        "fscore": rel_fscore,
+                        "ner_fscore": ner_fscore,
+                        "rel_fscore": rel_fscore,
+                        "ev_fscore": ev_fscore,
+                        'device': params['device']
+                    },
+                    filter_func=utils.save_best_fscore,
+                    num_saved=1
+                )
+        if params['save_ev']:
+            ev_model_path = params['ev_model_dir']
+            utils.handle_checkpoints(
+                model=model.EV_layer,
+                checkpoint_dir=ev_model_path,
+                params={
+                    "filename": "ev_base",
+                    "epoch": epoch,
+                    "fscore": ev_fscore,
+                    "ner_fscore": ner_fscore,
+                    "rel_fscore": rel_fscore,
+                    "ev_fscore": ev_fscore,
+                    'device': params['device']
+                },
+                filter_func=utils.save_best_fscore,
+                num_saved=1
+            )
+            if params['save_model_pipeline']:
+                ner_model_path = params['ner_model_dir']
+                rel_model_path = params['rel_model_dir']
+                utils.handle_checkpoints(
+                    model=model.NER_layer,
+                    checkpoint_dir=ner_model_path,
+                    params={
+                        "filename": "ev_base",
+                        "epoch": epoch,
+                        "fscore": ev_fscore,
+                        "ner_fscore": ner_fscore,
+                        "rel_fscore": rel_fscore,
+                        "ev_fscore": ev_fscore,
+                        'device': params['device']
+                    },
+                    filter_func=utils.save_best_fscore,
+                    num_saved=1
+                )
+                utils.handle_checkpoints(
+                    model=model.REL_layer,
+                    checkpoint_dir=rel_model_path,
+                    params={
+                        "filename": "ev_base",
+                        "epoch": epoch,
+                        "fscore": ev_fscore,
+                        "ner_fscore": ner_fscore,
+                        "rel_fscore": rel_fscore,
+                        "ev_fscore": ev_fscore,
+                        'device': params['device']
+                    },
+                    filter_func=utils.save_best_fscore,
+                    num_saved=1
+                )
+
+        if params['save_all_models']:
+            deepee_model_path = params['joint_model_dir']
+            utils.handle_checkpoints(
+                model=model,
+                checkpoint_dir=deepee_model_path,
+                params={
+                    "filename": "deepee_base",
+                    "epoch": epoch,
+                    "fscore": best_score,
+                    "ner_fscore": ner_fscore,
+                    "rel_fscore": rel_fscore,
+                    "ev_fscore": ev_fscore,
+                    'device': params['device']
+                },
+                filter_func=utils.save_best_fscore,
+                num_saved=1
+            )
+            print("Saved all models")
+        # ! ===================================
+
+    if len(ev_scores) > 0:
+        return ner_score, is_eval_rel, tr_scores, scores, {'p': ev_scores['tot_scores'][0],
+                                                           'r': ev_scores['tot_scores'][1],
+                                                           'f': ev_scores['tot_scores'][2]}
+    else:
+        return ner_score, is_eval_rel, tr_scores, scores, ev_scores
+
+
+def print_scores(k, v, stoso):
+    print(
+        k + "(MICRO): P/R/F1 = {:.02f}\t{:.02f}\t{:.02f} , (MACRO): P/R/F1 = {:.02f}\t{:.02f}\t{:.02f} ".format(
+            v['micro'][stoso + '_p'], v['micro'][stoso + '_r'], v['micro'][stoso + '_f'],
+            v['macro'][stoso + '_p'], v['macro'][stoso + '_r'], v['macro'][stoso + '_f']), end="",
+    )
+    print()
diff --git a/eval/scripts/n2c2.py b/eval/scripts/n2c2.py
new file mode 100644
index 0000000..3865281
--- /dev/null
+++ b/eval/scripts/n2c2.py
@@ -0,0 +1,1511 @@
+#!/usr/local/bin/python
+
+"""Inter-annotator agreement calculator."""
+
+"""
+To run this file, please use:
+
+python <gold standard folder> <system output folder>
+
+e.g.: python gold_annotations system_annotations
+
+Please note that you must use Python 3 to get the correct results with this script
+
+
+"""
+
+import argparse
+import glob
+import os
+import logging
+from collections import defaultdict
+from xml.etree import cElementTree
+
+logger = logging.getLogger(__name__)
+
+
+class ClinicalCriteria(object):
+    """Criteria in the Track 1 documents."""
+
+    def __init__(self, tid, value):
+        """Init."""
+        self.tid = tid.strip().upper()
+        self.ttype = self.tid
+        self.value = value.lower().strip()
+
+    def equals(self, other, mode='strict'):
+        """Return whether the current criteria is equal to the one provided."""
+        if other.tid == self.tid and other.value == self.value:
+            return True
+        return False
+
+
+class ClinicalConcept(object):
+    """Named Entity Tag class."""
+
+    def __init__(self, tid, start, end, ttype, text=''):
+        """Init."""
+        self.tid = str(tid).strip()
+        self.start = int(start)
+        self.end = int(end)
+        self.text = str(text).strip()
+        self.ttype = str(ttype).strip()
+
+    def span_matches(self, other, mode='strict'):
+        """Return whether the current tag overlaps with the one provided."""
+        assert mode in ('strict', 'lenient')
+        if mode == 'strict':
+            if self.start == other.start and self.end == other.end:
+                return True
+        else:  # lenient
+            if (self.end > other.start and self.start < other.end) or \
+                    (self.start < other.end and other.start < self.end):
+                return True
+        return False
+
+    def equals(self, other, mode='strict'):
+        """Return whether the current tag is equal to the one provided."""
+        assert mode in ('strict', 'lenient')
+        return other.ttype == self.ttype and self.span_matches(other, mode)
+
+    def __str__(self):
+        """String representation."""
+        return '{}\t{}\t({}:{})'.format(self.ttype, self.text, self.start, self.end)
+
+
+class Relation(object):
+    """Relation class."""
+
+    def __init__(self, rid, arg1, arg2, rtype):
+        """Init."""
+        assert isinstance(arg1, ClinicalConcept)
+        assert isinstance(arg2, ClinicalConcept)
+        self.rid = str(rid).strip()
+        self.arg1 = arg1
+        self.arg2 = arg2
+        self.rtype = str(rtype).strip()
+
+    def equals(self, other, mode='strict'):
+        """Return whether the current tag is equal to the one provided."""
+        assert mode in ('strict', 'lenient')
+        if self.arg1.equals(other.arg1, mode) and \
+                self.arg2.equals(other.arg2, mode) and \
+                self.rtype == other.rtype:
+            return True
+        return False
+
+    def __str__(self):
+        """String representation."""
+        return '{} ({}->{})'.format(self.rtype, self.arg1.ttype,
+                                    self.arg2.ttype)
+
+
+class RecordTrack1(object):
+    """Record for Track 2 class."""
+
+    def __init__(self, file_path):
+        self.path = os.path.abspath(file_path)
+        self.basename = os.path.basename(self.path)
+        self.annotations = self._get_annotations()
+        self.text = None
+
+    @property
+    def tags(self):
+        return self.annotations['tags']
+
+    def _get_annotations(self):
+        """Return a dictionary with all the annotations in the .ann file."""
+        annotations = defaultdict(dict)
+        annotation_file = cElementTree.parse(self.path)
+        for tag in annotation_file.findall('.//TAGS/*'):
+            criterion = ClinicalCriteria(tag.tag.upper(), tag.attrib['met'])
+            annotations['tags'][tag.tag.upper()] = criterion
+            if tag.attrib['met'] not in ('met', 'not met'):
+                assert '{}: Unexpected value ("{}") for the {} tag!'.format(
+                    self.path, criterion.value, criterion.ttype)
+        return annotations
+
+
+class RecordTrack2(object):
+    """Record for Track 2 class."""
+
+    def __init__(self, file_path):
+        """Initialize."""
+        self.path = os.path.abspath(file_path)
+        self.basename = os.path.basename(self.path)
+        self.annotations = self._get_annotations()
+        # self.text = self._get_text()
+
+    @property
+    def tags(self):
+        return self.annotations['tags']
+
+    @property
+    def relations(self):
+        return self.annotations['relations']
+
+    def _get_annotations(self):
+        """Return a dictionary with all the annotations in the .ann file."""
+        annotations = defaultdict(dict)
+        with open(self.path) as annotation_file:
+            lines = annotation_file.readlines()
+            for line_num, line in enumerate(lines):
+                if line.strip().startswith('T'):
+                    try:
+                        tag_id, tag_m, tag_text = line.strip().split('\t')
+                    except ValueError:
+                        print(self.path, line)
+                    if len(tag_m.split(' ')) == 3:
+                        tag_type, tag_start, tag_end = tag_m.split(' ')
+                    elif len(tag_m.split(' ')) == 4:
+                        tag_type, tag_start, _, tag_end = tag_m.split(' ')
+                    elif len(tag_m.split(' ')) == 5:
+                        tag_type, tag_start, _, _, tag_end = tag_m.split(' ')
+                    else:
+                        print(self.path)
+                        print(line)
+                    tag_start, tag_end = int(tag_start), int(tag_end)
+                    annotations['tags'][tag_id] = ClinicalConcept(tag_id,
+                                                                  tag_start,
+                                                                  tag_end,
+                                                                  tag_type,
+                                                                  tag_text)
+            for line_num, line in enumerate(lines):
+                if line.strip().startswith('R'):
+                    rel_id, rel_m = line.strip().split('\t')
+                    rel_type, rel_arg1, rel_arg2 = rel_m.split(' ')
+                    rel_arg1 = rel_arg1.split(':')[1]
+                    rel_arg2 = rel_arg2.split(':')[1]
+                    try:
+                        arg1 = annotations['tags'][rel_arg1]
+                        arg2 = annotations['tags'][rel_arg2]
+                        annotations['relations'][rel_id] = Relation(rel_id, arg1,
+                                                                    arg2, rel_type)
+                    except KeyError as err:
+                        logger.info(err)
+        return annotations
+
+    def _get_text(self):
+        """Return the text in the corresponding txt file."""
+        path = self.path.replace('.ann', '.txt')
+        with open(path) as text_file:
+            text = text_file.read()
+        return text
+
+    def search_by_id(self, key):
+        """Search by id among both tags and relations."""
+        try:
+            return self.annotations['tags'][key]
+        except KeyError():
+            try:
+                return self.annotations['relations'][key]
+            except KeyError():
+                return None
+
+
+class Measures(object):
+    """Abstract methods and var to evaluate."""
+
+    def __init__(self, tp=0, tn=0, fp=0, fn=0):
+        """Initizialize."""
+        assert type(tp) == int
+        assert type(tn) == int
+        assert type(fp) == int
+        assert type(fn) == int
+        self.tp = tp
+        self.tn = tn
+        self.fp = fp
+        self.fn = fn
+
+    def precision(self):
+        """Compute Precision score."""
+        try:
+            return self.tp / (self.tp + self.fp)
+        except ZeroDivisionError:
+            return 0.0
+
+    def recall(self):
+        """Compute Recall score."""
+        try:
+            return self.tp / (self.tp + self.fn)
+        except ZeroDivisionError:
+            return 0.0
+
+    def f_score(self, beta=1):
+        """Compute F1-measure score."""
+        assert beta > 0.
+        try:
+            num = (1 + beta ** 2) * (self.precision() * self.recall())
+            den = beta ** 2 * (self.precision() + self.recall())
+            return num / den
+        except ZeroDivisionError:
+            return 0.0
+
+    def f1(self):
+        """Compute the F1-score (beta=1)."""
+        return self.f_score(beta=1)
+
+    def specificity(self):
+        """Compute Specificity score."""
+        try:
+            return self.tn / (self.fp + self.tn)
+        except ZeroDivisionError:
+            return 0.0
+
+    def sensitivity(self):
+        """Compute Sensitivity score."""
+        return self.recall()
+
+    def auc(self):
+        """Compute AUC score."""
+        return (self.sensitivity() + self.specificity()) / 2
+
+
+class SingleEvaluator(object):
+    """Evaluate two single files."""
+
+    def __init__(self, doc1, doc2, track, mode='strict', key=None, verbose=False, exclude_tags=()):
+        """Initialize."""
+        assert isinstance(doc1, RecordTrack2) or isinstance(doc1, RecordTrack1)
+        assert isinstance(doc2, RecordTrack2) or isinstance(doc2, RecordTrack1)
+        assert mode in ('strict', 'lenient')
+        assert doc1.basename == doc2.basename
+        self.scores = {'tags': {'tp': 0, 'fp': 0, 'fn': 0, 'tn': 0},
+                       'relations': {'tp': 0, 'fp': 0, 'fn': 0, 'tn': 0}}
+        self.doc1 = doc1
+        self.doc2 = doc2
+        if key:
+            gol = [t for t in doc1.tags.values() if t.ttype == key and t.ttype not in exclude_tags]
+            sys = [t for t in doc2.tags.values() if t.ttype == key and t.ttype not in exclude_tags]
+            sys_check = [t for t in doc2.tags.values() if t.ttype == key and t.ttype not in exclude_tags]
+        else:
+            gol = [t for t in doc1.tags.values() if t.ttype not in exclude_tags]
+            sys = [t for t in doc2.tags.values() if t.ttype not in exclude_tags]
+            sys_check = [t for t in doc2.tags.values() if t.ttype not in exclude_tags]
+
+        # pare down matches -- if multiple system tags overlap with only one
+        # gold standard tag, only keep one sys tag
+        gol_matched = []
+        for s in sys:
+            for g in gol:
+                if (g.equals(s, mode)):
+                    if g not in gol_matched:
+                        gol_matched.append(g)
+                    else:
+                        if s in sys_check:
+                            sys_check.remove(s)
+
+        sys = sys_check
+        # now evaluate
+        self.scores['tags']['tp'] = len({s.tid for s in sys for g in gol if g.equals(s, mode)})
+        self.scores['tags']['fp'] = len({s.tid for s in sys}) - self.scores['tags']['tp']
+        self.scores['tags']['fn'] = len({g.tid for g in gol}) - self.scores['tags']['tp']
+        self.scores['tags']['tn'] = 0
+
+        if verbose and track == 2:
+            tps = {s for s in sys for g in gol if g.equals(s, mode)}
+            fps = set(sys) - tps
+            fns = set()
+            for g in gol:
+                if not len([s for s in sys if s.equals(g, mode)]):
+                    fns.add(g)
+            for e in fps:
+                print('FP: ' + str(e))
+            for e in fns:
+                print('FN:' + str(e))
+        if track == 2:
+            if key:
+                gol = [r for r in doc1.relations.values() if r.rtype == key]
+                sys = [r for r in doc2.relations.values() if r.rtype == key]
+                sys_check = [r for r in doc2.relations.values() if r.rtype == key]
+            else:
+                gol = [r for r in doc1.relations.values()]
+                sys = [r for r in doc2.relations.values()]
+                sys_check = [r for r in doc2.relations.values()]
+
+            # pare down matches -- if multiple system tags overlap with only one
+            # gold standard tag, only keep one sys tag
+            gol_matched = []
+            for s in sys:
+                for g in gol:
+                    if (g.equals(s, mode)):
+                        if g not in gol_matched:
+                            gol_matched.append(g)
+                        else:
+                            if s in sys_check:
+                                sys_check.remove(s)
+            sys = sys_check
+            # now evaluate
+            self.scores['relations']['tp'] = len({s.rid for s in sys for g in gol if g.equals(s, mode)})
+            self.scores['relations']['fp'] = len({s.rid for s in sys}) - self.scores['relations']['tp']
+            self.scores['relations']['fn'] = len({g.rid for g in gol}) - self.scores['relations']['tp']
+            self.scores['relations']['tn'] = 0
+            if verbose:
+                tps = {s for s in sys for g in gol if g.equals(s, mode)}
+                fps = set(sys) - tps
+                fns = set()
+                for g in gol:
+                    if not len([s for s in sys if s.equals(g, mode)]):
+                        fns.add(g)
+                for e in fps:
+                    print('FP: ' + str(e))
+                for e in fns:
+                    print('FN:' + str(e))
+
+
+class MultipleEvaluator(object):
+    """Evaluate two sets of files."""
+
+    def __init__(self, corpora, tag_type=None, mode='strict',
+                 verbose=False):
+        """Initialize."""
+        assert isinstance(corpora, Corpora)
+        assert mode in ('strict', 'lenient')
+        self.scores = None
+        if corpora.track == 1:
+            self.track1(corpora)
+        else:
+            self.track2(corpora, tag_type, mode, verbose)
+
+    def track1(self, corpora):
+        """Compute measures for Track 1."""
+        self.tags = ('ABDOMINAL', 'ADVANCED-CAD', 'ALCOHOL-ABUSE',
+                     'ASP-FOR-MI', 'CREATININE', 'DIETSUPP-2MOS',
+                     'DRUG-ABUSE', 'ENGLISH', 'HBA1C', 'KETO-1YR',
+                     'MAJOR-DIABETES', 'MAKES-DECISIONS', 'MI-6MOS')
+        self.scores = defaultdict(dict)
+        metrics = ('p', 'r', 'f1', 'specificity', 'auc')
+        values = ('met', 'not met')
+        self.values = {'met': {'tp': 0, 'fp': 0, 'tn': 0, 'fn': 0},
+                       'not met': {'tp': 0, 'fp': 0, 'tn': 0, 'fn': 0}}
+
+        def evaluation(corpora, value, scores):
+            predictions = defaultdict(list)
+            for g, s in corpora.docs:
+                for tag in self.tags:
+                    predictions[tag].append(
+                        (g.tags[tag].value == value, s.tags[tag].value == value))
+            for tag in self.tags:
+                # accumulate for micro overall measure
+                self.values[value]['tp'] += predictions[tag].count((True, True))
+                self.values[value]['fp'] += predictions[tag].count((False, True))
+                self.values[value]['tn'] += predictions[tag].count((False, False))
+                self.values[value]['fn'] += predictions[tag].count((True, False))
+
+                # compute per-tag measures
+                measures = Measures(tp=predictions[tag].count((True, True)),
+                                    fp=predictions[tag].count((False, True)),
+                                    tn=predictions[tag].count((False, False)),
+                                    fn=predictions[tag].count((True, False)))
+                scores[(tag, value, 'p')] = measures.precision()
+                scores[(tag, value, 'r')] = measures.recall()
+                scores[(tag, value, 'f1')] = measures.f1()
+                scores[(tag, value, 'specificity')] = measures.specificity()
+                scores[(tag, value, 'auc')] = measures.auc()
+            return scores
+
+        self.scores = evaluation(corpora, 'met', self.scores)
+        self.scores = evaluation(corpora, 'not met', self.scores)
+
+        for measure in metrics:
+            for value in values:
+                self.scores[('macro', value, measure)] = sum(
+                    [self.scores[(t, value, measure)] for t in self.tags]) / len(self.tags)
+
+    def track2(self, corpora, tag_type=None, mode='strict', verbose=False):
+        """Compute measures for Track 2."""
+        self.scores = {'tags': {'tp': 0,
+                                'fp': 0,
+                                'fn': 0,
+                                'tn': 0,
+                                'micro': {'precision': 0,
+                                          'recall': 0,
+                                          'f1': 0},
+                                'macro': {'precision': 0,
+                                          'recall': 0,
+                                          'f1': 0}},
+                       'relations': {'tp': 0,
+                                     'fp': 0,
+                                     'fn': 0,
+                                     'tn': 0,
+                                     'micro': {'precision': 0,
+                                               'recall': 0,
+                                               'f1': 0},
+                                     'macro': {'precision': 0,
+                                               'recall': 0,
+                                               'f1': 0}}}
+
+        # -----------------------------------------CG--------------------------------------------
+        if corpora.corpus_type == 'cg':
+            self.tags = (
+                'Gene_expression', 'Mutation', 'Regulation', 'Development', 'Negative_regulation', 'Cell_proliferation',
+                'Transcription', 'Glycosylation', 'Positive_regulation', 'Binding', 'Localization', 'Planned_process',
+                'Metastasis', 'Death', 'Blood_vessel_development', 'Breakdown', 'Growth', 'Cell_transformation',
+                'Carcinogenesis', 'Cell_differentiation', 'Cell_death', 'Cell_division', 'Infection', 'Pathway',
+                'Dephosphorylation', 'Synthesis', 'Catabolism', 'Protein_processing', 'Remodeling', 'Metabolism',
+                'Dissociation', 'Phosphorylation', 'Glycolysis', 'Translation', 'DNA_methylation', 'Reproduction',
+                'Acetylation', 'Ubiquitination', 'Amino_acid_catabolism', 'DNA_demethylation', 'Gene_or_gene_product',
+                'Cancer', 'Cell', 'Organism', 'DNA_domain_or_region', 'Simple_chemical', 'Multi-tissue_structure',
+                'Organ', 'Organism_subdivision', 'Tissue', 'Immaterial_anatomical_entity', 'Organism_substance',
+                'Protein_domain_or_region', 'Cellular_component', 'Pathological_formation', 'Amino_acid',
+                'Anatomical_system', 'Developing_anatomical_structure')
+            self.exclude_tags = ()
+            self.relations = (
+                'CSite', 'FromLoc', 'Site', 'ToLoc', 'Participant', 'AtLoc', 'Instrument', 'Cause', 'Theme')
+        elif corpora.corpus_type == 'cg_tr':
+            self.tags = (
+                'Gene_expression', 'Mutation', 'Regulation', 'Development', 'Negative_regulation', 'Cell_proliferation',
+                'Transcription', 'Glycosylation', 'Positive_regulation', 'Binding', 'Localization', 'Planned_process',
+                'Metastasis', 'Death', 'Blood_vessel_development', 'Breakdown', 'Growth', 'Cell_transformation',
+                'Carcinogenesis', 'Cell_differentiation', 'Cell_death', 'Cell_division', 'Infection', 'Pathway',
+                'Dephosphorylation', 'Synthesis', 'Catabolism', 'Protein_processing', 'Remodeling', 'Metabolism',
+                'Dissociation', 'Phosphorylation', 'Glycolysis', 'Translation', 'DNA_methylation', 'Reproduction',
+                'Acetylation', 'Ubiquitination', 'Amino_acid_catabolism', 'DNA_demethylation', 'Gene_or_gene_product',
+                'Cancer', 'Cell', 'Organism', 'DNA_domain_or_region', 'Simple_chemical', 'Multi-tissue_structure',
+                'Organ', 'Organism_subdivision', 'Tissue', 'Immaterial_anatomical_entity', 'Organism_substance',
+                'Protein_domain_or_region', 'Cellular_component', 'Pathological_formation', 'Amino_acid',
+                'Anatomical_system', 'Developing_anatomical_structure')
+            self.exclude_tags = (
+                'Immaterial_anatomical_entity', 'Cancer', 'Multi-tissue_structure', 'Anatomical_system',
+                'Pathological_formation', 'Tissue', 'Gene_or_gene_product', 'Cell', 'Protein_domain_or_region',
+                'Developing_anatomical_structure', 'Organism', 'Organ', 'Simple_chemical', 'Organism_subdivision',
+                'Amino_acid', 'Organism_substance', 'DNA_domain_or_region', 'Cellular_component'
+            )
+            self.relations = (
+                'CSite', 'FromLoc', 'Site', 'ToLoc', 'Participant', 'AtLoc', 'Instrument', 'Cause', 'Theme'
+            )
+        elif corpora.corpus_type == 'cg_en':
+            self.tags = (
+                'Gene_expression', 'Mutation', 'Regulation', 'Development', 'Negative_regulation', 'Cell_proliferation',
+                'Transcription', 'Glycosylation', 'Positive_regulation', 'Binding', 'Localization', 'Planned_process',
+                'Metastasis', 'Death', 'Blood_vessel_development', 'Breakdown', 'Growth', 'Cell_transformation',
+                'Carcinogenesis', 'Cell_differentiation', 'Cell_death', 'Cell_division', 'Infection', 'Pathway',
+                'Dephosphorylation', 'Synthesis', 'Catabolism', 'Protein_processing', 'Remodeling', 'Metabolism',
+                'Dissociation', 'Phosphorylation', 'Glycolysis', 'Translation', 'DNA_methylation', 'Reproduction',
+                'Acetylation', 'Ubiquitination', 'Amino_acid_catabolism', 'DNA_demethylation', 'Gene_or_gene_product',
+                'Cancer', 'Cell', 'Organism', 'DNA_domain_or_region', 'Simple_chemical', 'Multi-tissue_structure',
+                'Organ', 'Organism_subdivision', 'Tissue', 'Immaterial_anatomical_entity', 'Organism_substance',
+                'Protein_domain_or_region', 'Cellular_component', 'Pathological_formation', 'Amino_acid',
+                'Anatomical_system', 'Developing_anatomical_structure')
+            self.exclude_tags = (
+                'Positive_regulation', 'Negative_regulation', 'Regulation', 'Planned_process', 'Gene_expression',
+                'Localization', 'Blood_vessel_development', 'Metastasis', 'Development', 'Cell_proliferation',
+                'Cell_death', 'Binding', 'Pathway', 'Mutation', 'Cell_transformation', 'Carcinogenesis',
+                'Growth', 'Death', 'Transcription', 'Breakdown', 'Cell_differentiation', 'Phosphorylation',
+                'Metabolism', 'Glycolysis', 'Synthesis', 'Remodeling', 'DNA_methylation', 'Catabolism',
+                'Infection', 'Protein_processing', 'Translation', 'Glycosylation', 'Dephosphorylation',
+                'Acetylation', 'Dissociation', 'Cell_division', 'Amino_acid_catabolism', 'Reproduction',
+                'Ubiquitination', 'DNA_demethylation')
+            self.relations = (
+                'CSite', 'FromLoc', 'Site', 'ToLoc', 'Participant', 'AtLoc', 'Instrument', 'Cause', 'Theme')
+
+        # -----------------------------------------ACE--------------------------------------------
+        elif corpora.corpus_type == 'ace':
+            self.tags = (
+                'Die', 'Injure', 'Attack', 'Transport', 'Start-Position', 'Arrest-Jail', 'Meet', 'Transfer-Money',
+                'Sue', 'Charge-Indict', 'Sentence', 'Convict', 'End-Position', 'Transfer-Ownership', 'Demonstrate',
+                'Execute', 'Appeal', 'Phone-Write', 'Elect', 'Trial-Hearing', 'Release-Parole', 'Acquit', 'Fine',
+                'Start-Org', 'End-Org', 'Marry', 'Declare-Bankruptcy', 'Be-Born', 'Divorce', 'Extradite', 'Pardon',
+                'Nominate', 'Merge-Org', 'LOC', 'FAC', 'PER', 'ORG', 'GPE', 'Time', 'WEA', 'VEH', 'Money', 'Crime',
+                'Percent', 'Job-Title')
+            self.exclude_tags = ()
+            self.relations = (
+                'Price', 'Time-At-End', 'Time-At-Beginning', 'Time-Ending', 'Time-Before', 'Time-After', 'Prosecutor',
+                'Beneficiary', 'Seller', 'Time-Starting', 'Time-Holds', 'Plaintiff', 'Sentence', 'Vehicle', 'Money',
+                'Buyer', 'Adjudicator', 'Org', 'Giver', 'Position', 'Recipient', 'Origin', 'Crime', 'Instrument',
+                'Defendant', 'Agent', 'Target', 'Destination', 'Attacker', 'Victim', 'Artifact', 'Person',
+                'Time-Within', 'Entity', 'Place')
+
+        elif corpora.corpus_type == 'ace_tr':
+            self.tags = (
+                'Die', 'Injure', 'Attack', 'Transport', 'Start-Position', 'Arrest-Jail', 'Meet', 'Transfer-Money',
+                'Sue', 'Charge-Indict', 'Sentence', 'Convict', 'End-Position', 'Transfer-Ownership', 'Demonstrate',
+                'Execute', 'Appeal', 'Phone-Write', 'Elect', 'Trial-Hearing', 'Release-Parole', 'Acquit', 'Fine',
+                'Start-Org', 'End-Org', 'Marry', 'Declare-Bankruptcy', 'Be-Born', 'Divorce', 'Extradite', 'Pardon',
+                'Nominate', 'Merge-Org', 'LOC', 'FAC', 'PER', 'ORG', 'GPE', 'Time', 'WEA', 'VEH', 'Money', 'Crime',
+                'Percent', 'Job-Title')
+            self.exclude_tags = (
+                'ORG', 'VEH', 'Time', 'GPE', 'FAC', 'Money', 'LOC', 'PER', 'WEA', 'Job-Title', 'Percent', 'Crime'
+            )
+            self.relations = (
+                'Price', 'Time-At-End', 'Time-At-Beginning', 'Time-Ending', 'Time-Before', 'Time-After', 'Prosecutor',
+                'Beneficiary', 'Seller', 'Time-Starting', 'Time-Holds', 'Plaintiff', 'Sentence', 'Vehicle', 'Money',
+                'Buyer', 'Adjudicator', 'Org', 'Giver', 'Position', 'Recipient', 'Origin', 'Crime', 'Instrument',
+                'Defendant', 'Agent', 'Target', 'Destination', 'Attacker', 'Victim', 'Artifact', 'Person',
+                'Time-Within', 'Entity', 'Place')
+
+        # -----------------------------------------GE13--------------------------------------------
+        elif corpora.corpus_type == 'ge13':
+            self.tags = (
+                "Anaphora",
+                "Entity",
+                "Protein",
+                "Acetylation",
+                "Binding",
+                "Deacetylation",
+                "Gene_expression",
+                "Localization",
+                "Negative_regulation",
+                "Phosphorylation",
+                "Positive_regulation",
+                "Protein_catabolism",
+                "Protein_modification",
+                "Regulation",
+                "Transcription",
+                "Ubiquitination",
+            )
+            self.exclude_tags = (
+            )
+            self.relations = (
+                "CSite",
+                "Cause",
+                "Site",
+                "Theme",
+                "ToLoc"
+            )
+        elif corpora.corpus_type == 'ge13_tr':
+            self.tags = (
+                "Anaphora",
+                "Entity",
+                "Protein",
+                "Acetylation",
+                "Binding",
+                "Deacetylation",
+                "Gene_expression",
+                "Localization",
+                "Negative_regulation",
+                "Phosphorylation",
+                "Positive_regulation",
+                "Protein_catabolism",
+                "Protein_modification",
+                "Regulation",
+                "Transcription",
+                "Ubiquitination",
+            )
+            self.exclude_tags = (
+                "Anaphora",
+                "Entity",
+                "Protein",
+            )
+            self.relations = (
+                "CSite",
+                "Cause",
+                "Site",
+                "Theme",
+                "ToLoc",
+            )
+        elif corpora.corpus_type == 'ge13_en':
+            self.tags = (
+                "Anaphora",
+                "Entity",
+                "Protein",
+                "Acetylation",
+                "Binding",
+                "Deacetylation",
+                "Gene_expression",
+                "Localization",
+                "Negative_regulation",
+                "Phosphorylation",
+                "Positive_regulation",
+                "Protein_catabolism",
+                "Protein_modification",
+                "Regulation",
+                "Transcription",
+                "Ubiquitination",
+            )
+            self.exclude_tags = (
+                "Acetylation",
+                "Binding",
+                "Deacetylation",
+                "Gene_expression",
+                "Localization",
+                "Negative_regulation",
+                "Phosphorylation",
+                "Positive_regulation",
+                "Protein_catabolism",
+                "Protein_modification",
+                "Regulation",
+                "Transcription",
+                "Ubiquitination",
+            )
+            self.relations = (
+                "CSite",
+                "Cause",
+                "Site",
+                "Theme",
+                "ToLoc",
+            )
+        # -----------------------------------------GE11--------------------------------------------
+        elif corpora.corpus_type == 'ge11':
+            self.tags = (
+                "Entity",
+                "Protein",
+                "Binding",
+                "Gene_expression",
+                "Localization",
+                "Negative_regulation",
+                "Phosphorylation",
+                "Positive_regulation",
+                "Protein_catabolism",
+                "Regulation",
+                "Transcription",
+            )
+            self.exclude_tags = (
+            )
+            self.relations = (
+                "AtLoc",
+                "CSite",
+                "Cause",
+                "Site",
+                "Theme",
+                "ToLoc",
+            )
+        elif corpora.corpus_type == 'ge11_tr':
+            self.tags = (
+                "Entity",
+                "Protein",
+                "Binding",
+                "Gene_expression",
+                "Localization",
+                "Negative_regulation",
+                "Phosphorylation",
+                "Positive_regulation",
+                "Protein_catabolism",
+                "Regulation",
+                "Transcription",
+            )
+            self.exclude_tags = (
+                "Entity",
+                "Protein"
+            )
+            self.relations = (
+                "AtLoc",
+                "CSite",
+                "Cause",
+                "Site",
+                "Theme",
+                "ToLoc",
+            )
+
+        elif corpora.corpus_type == 'ge11_en':
+            self.tags = (
+                "Entity",
+                "Protein",
+                "Binding",
+                "Gene_expression",
+                "Localization",
+                "Negative_regulation",
+                "Phosphorylation",
+                "Positive_regulation",
+                "Protein_catabolism",
+                "Regulation",
+                "Transcription",
+            )
+            self.exclude_tags = ("Binding",
+                                 "Gene_expression",
+                                 "Localization",
+                                 "Negative_regulation",
+                                 "Phosphorylation",
+                                 "Positive_regulation",
+                                 "Protein_catabolism",
+                                 "Regulation",
+                                 "Transcription",)
+            self.relations = (
+                "AtLoc",
+                "CSite",
+                "Cause",
+                "Site",
+                "Theme",
+                "ToLoc",
+            )
+        # -----------------------------------------ID--------------------------------------------
+        elif corpora.corpus_type == 'id':
+            self.tags = (
+                "Chemical",
+                "Entity",
+                "Organism",
+                "Protein",
+                "Regulon-operon",
+                "Two-component-system",
+                "Binding",
+                "Gene_expression",
+                "Localization",
+                "Negative_regulation",
+                "Phosphorylation",
+                "Positive_regulation",
+                "Process",
+                "Protein_catabolism",
+                "Regulation",
+                "Transcription",
+            )
+            self.exclude_tags = (
+            )
+            self.relations = (
+                "AtLoc",
+                "CSite",
+                "Cause",
+                "Participant",
+                "Site",
+                "Theme",
+                "ToLoc"
+            )
+        elif corpora.corpus_type == 'id_tr':
+            self.tags = (
+                "Chemical",
+                "Entity",
+                "Organism",
+                "Protein",
+                "Regulon-operon",
+                "Two-component-system",
+                "Binding",
+                "Gene_expression",
+                "Localization",
+                "Negative_regulation",
+                "Phosphorylation",
+                "Positive_regulation",
+                "Process",
+                "Protein_catabolism",
+                "Regulation",
+                "Transcription",
+            )
+            self.exclude_tags = (
+                "Chemical",
+                "Entity",
+                "Organism",
+                "Protein",
+                "Regulon-operon",
+                "Two-component-system",
+            )
+            self.relations = (
+                "AtLoc",
+                "CSite",
+                "Cause",
+                "Participant",
+                "Site",
+                "Theme",
+                "ToLoc",
+            )
+        elif corpora.corpus_type == 'id_en':
+            self.tags = (
+                "Binding",
+                "Gene_expression",
+                "Localization",
+                "Negative_regulation",
+                "Phosphorylation",
+                "Positive_regulation",
+                "Process",
+                "Protein_catabolism",
+                "Regulation",
+                "Transcription",
+            )
+            self.exclude_tags = ()
+            self.relations = (
+                "AtLoc",
+                "CSite",
+                "Cause",
+                "Participant",
+                "Site",
+                "Theme",
+                "ToLoc",
+            )
+
+        # -----------------------------------------MLEE--------------------------------------------
+        elif corpora.corpus_type == 'mlee':
+            self.tags = (
+                'Positive_regulation', 'Blood_vessel_development', 'Negative_regulation', 'Regulation',
+                'Planned_process',
+                'Localization', 'Development', 'Gene_expression', 'Growth', 'Binding', 'Cell_proliferation', 'Pathway',
+                'Death', 'Breakdown', 'Remodeling', 'Catabolism', 'Phosphorylation', 'Transcription', 'Synthesis',
+                'DNA_methylation', 'Metabolism', 'Protein_processing', 'Acetylation', 'Translation',
+                'Dephosphorylation',
+                'Ubiquitination', 'Gene_or_gene_product', 'Cell', 'Drug_or_compound', 'Pathological_formation',
+                'Organism', 'Multi-tissue_structure',
+                'Tissue', 'Organ', 'Cellular_component', 'Organism_substance', 'DNA_domain_or_region',
+                'Organism_subdivision',
+                'Protein_domain_or_region', 'Anatomical_system', 'Immaterial_anatomical_entity',
+                'Developing_anatomical_structure'
+            )
+            self.exclude_tags = ()
+            self.relations = (
+                'Theme', 'Cause', 'Instrument', 'AtLoc', 'Participant', 'Site', 'ToLoc', 'CSite', 'FromLoc')
+        elif corpora.corpus_type == 'mlee_tr':
+            self.tags = (
+                'Positive_regulation', 'Blood_vessel_development', 'Negative_regulation', 'Regulation',
+                'Planned_process',
+                'Localization', 'Development', 'Gene_expression', 'Growth', 'Binding', 'Cell_proliferation', 'Pathway',
+                'Death', 'Breakdown', 'Remodeling', 'Catabolism', 'Phosphorylation', 'Transcription', 'Synthesis',
+                'DNA_methylation', 'Metabolism', 'Protein_processing', 'Acetylation', 'Translation',
+                'Dephosphorylation',
+                'Ubiquitination', 'Gene_or_gene_product', 'Cell', 'Drug_or_compound', 'Pathological_formation',
+                'Organism', 'Multi-tissue_structure',
+                'Tissue', 'Organ', 'Cellular_component', 'Organism_substance', 'DNA_domain_or_region',
+                'Organism_subdivision',
+                'Protein_domain_or_region', 'Anatomical_system', 'Immaterial_anatomical_entity',
+                'Developing_anatomical_structure'
+            )
+            self.exclude_tags = (
+                'Gene_or_gene_product', 'Cell', 'Drug_or_compound', 'Pathological_formation', 'Organism',
+                'Multi-tissue_structure',
+                'Tissue', 'Organ', 'Cellular_component', 'Organism_substance', 'DNA_domain_or_region',
+                'Organism_subdivision',
+                'Protein_domain_or_region', 'Anatomical_system', 'Immaterial_anatomical_entity',
+                'Developing_anatomical_structure'
+            )
+            self.relations = (
+                'Theme', 'Cause', 'Instrument', 'AtLoc', 'Participant', 'Site', 'ToLoc', 'CSite', 'FromLoc')
+        elif corpora.corpus_type == 'mlee_en':
+            self.tags = (
+                'Positive_regulation', 'Blood_vessel_development', 'Negative_regulation', 'Regulation',
+                'Planned_process',
+                'Localization', 'Development', 'Gene_expression', 'Growth', 'Binding', 'Cell_proliferation', 'Pathway',
+                'Death', 'Breakdown', 'Remodeling', 'Catabolism', 'Phosphorylation', 'Transcription', 'Synthesis',
+                'DNA_methylation', 'Metabolism', 'Protein_processing', 'Acetylation', 'Translation',
+                'Dephosphorylation',
+                'Ubiquitination', 'Gene_or_gene_product', 'Cell', 'Drug_or_compound', 'Pathological_formation',
+                'Organism', 'Multi-tissue_structure',
+                'Tissue', 'Organ', 'Cellular_component', 'Organism_substance', 'DNA_domain_or_region',
+                'Organism_subdivision',
+                'Protein_domain_or_region', 'Anatomical_system', 'Immaterial_anatomical_entity',
+                'Developing_anatomical_structure'
+            )
+            self.exclude_tags = (
+                'Positive_regulation', 'Blood_vessel_development', 'Negative_regulation', 'Planned_process',
+                'Regulation',
+                'Localization', 'Gene_expression', 'Development', 'Growth', 'Binding', 'Cell_proliferation', 'Pathway',
+                'Death', 'Breakdown', 'Remodeling', 'Phosphorylation', 'Catabolism', 'Transcription', 'Synthesis',
+                'DNA_methylation', 'Metabolism', 'Protein_processing', 'Dephosphorylation', 'Reproduction',
+                'Acetylation',
+                'Translation', 'Cell_division', 'Dissociation', 'Ubiquitination')
+            self.relations = (
+                'Theme', 'Cause', 'Instrument', 'AtLoc', 'Participant', 'Site', 'ToLoc', 'CSite', 'FromLoc')
+
+        # -----------------------------------------PC--------------------------------------------
+        elif corpora.corpus_type == 'pc':
+            self.tags = (
+                'Positive_regulation', 'Negative_regulation', 'Regulation', 'Binding', 'Pathway', 'Phosphorylation',
+                'Gene_expression', 'Activation', 'Transport', 'Conversion', 'Localization', 'Inactivation',
+                'Transcription',
+                'Dissociation', 'Degradation', 'Ubiquitination', 'Acetylation', 'Dephosphorylation', 'Translation',
+                'Methylation',
+                'Demethylation', 'Deubiquitination', 'Hydroxylation', 'Deacetylation',
+                'Gene_or_gene_product', 'Simple_chemical', 'Complex', 'Cellular_component'
+            )
+            self.exclude_tags = (
+            )
+            self.relations = (
+                'Theme', 'Cause', 'Participant', 'Site', 'Product', 'ToLoc', 'AtLoc', 'FromLoc'
+            )
+        elif corpora.corpus_type == 'pc_tr':
+            self.tags = (
+                'Positive_regulation', 'Negative_regulation', 'Regulation', 'Binding', 'Pathway', 'Phosphorylation',
+                'Gene_expression', 'Activation', 'Transport', 'Conversion', 'Localization', 'Inactivation',
+                'Transcription',
+                'Dissociation', 'Degradation', 'Ubiquitination', 'Acetylation', 'Dephosphorylation', 'Translation',
+                'Methylation',
+                'Demethylation', 'Deubiquitination', 'Hydroxylation', 'Deacetylation'
+            )
+            self.exclude_tags = (
+                'Gene_or_gene_product', 'Simple_chemical', 'Complex', 'Cellular_component'
+            )
+            self.relations = (
+                'Theme', 'Cause', 'Participant', 'Site', 'Product', 'ToLoc', 'AtLoc', 'FromLoc'
+            )
+        elif corpora.corpus_type == 'pc_en':
+            self.tags = (
+                'Positive_regulation', 'Negative_regulation', 'Regulation', 'Binding', 'Pathway', 'Phosphorylation',
+                'Gene_expression', 'Activation', 'Transport', 'Conversion', 'Localization', 'Inactivation',
+                'Transcription',
+                'Dissociation', 'Degradation', 'Ubiquitination', 'Acetylation', 'Dephosphorylation', 'Translation',
+                'Methylation',
+                'Demethylation', 'Deubiquitination', 'Hydroxylation', 'Deacetylation',
+                'Gene_or_gene_product', 'Simple_chemical', 'Complex', 'Cellular_component'
+            )
+            self.exclude_tags = (
+                'Positive_regulation', 'Negative_regulation', 'Regulation', 'Binding', 'Pathway', 'Phosphorylation',
+                'Gene_expression', 'Activation', 'Transport', 'Conversion', 'Localization', 'Inactivation',
+                'Transcription',
+                'Dissociation', 'Degradation', 'Ubiquitination', 'Acetylation', 'Dephosphorylation', 'Translation',
+                'Methylation',
+                'Demethylation', 'Deubiquitination', 'Hydroxylation', 'Deacetylation'
+            )
+            self.relations = (
+                'Theme', 'Cause', 'Participant', 'Site', 'Product', 'ToLoc', 'AtLoc', 'FromLoc'
+            )
+        # -----------------------------------------EPI--------------------------------------------
+
+        elif corpora.corpus_type == 'epi':
+            self.tags = (
+                'Methylation', 'Glycosylation', 'Acetylation', 'Ubiquitination', 'DNA_methylation', 'Catalysis',
+                'Hydroxylation', 'Phosphorylation', 'Deacetylation', 'Deglycosylation', 'DNA_demethylation',
+                'Deubiquitination', 'Demethylation', 'Dephosphorylation', 'Dehydroxylation',
+                'Protein', 'Entity'
+            )
+            self.exclude_tags = (
+            )
+            self.relations = (
+                'Theme', 'Site', 'Cause', 'Contextgene', 'Sidechain'
+            )
+        elif corpora.corpus_type == 'epi_tr':
+            self.tags = (
+                'Methylation', 'Glycosylation', 'Acetylation', 'Ubiquitination', 'DNA_methylation', 'Catalysis',
+                'Hydroxylation', 'Phosphorylation', 'Deacetylation', 'Deglycosylation', 'DNA_demethylation',
+                'Deubiquitination', 'Demethylation', 'Dephosphorylation', 'Dehydroxylation',
+                'Protein', 'Entity'
+            )
+            self.exclude_tags = (
+                'Protein', 'Entity'
+            )
+            self.relations = (
+                'Theme', 'Site', 'Cause', 'Contextgene', 'Sidechain'
+            )
+        elif corpora.corpus_type == 'epi_en':
+            self.tags = (
+                'Methylation', 'Glycosylation', 'Acetylation', 'Ubiquitination', 'DNA_methylation', 'Catalysis',
+                'Hydroxylation', 'Phosphorylation', 'Deacetylation', 'Deglycosylation', 'DNA_demethylation',
+                'Deubiquitination', 'Demethylation', 'Dephosphorylation', 'Dehydroxylation',
+                'Protein', 'Entity'
+            )
+            self.exclude_tags = (
+                'Methylation', 'Glycosylation', 'Acetylation', 'Ubiquitination', 'DNA_methylation', 'Catalysis',
+                'Hydroxylation', 'Phosphorylation', 'Deacetylation', 'Deglycosylation', 'DNA_demethylation',
+                'Deubiquitination', 'Demethylation', 'Dephosphorylation', 'Dehydroxylation'
+            )
+            self.relations = (
+                'Theme', 'Site', 'Cause', 'Contextgene', 'Sidechain'
+            )
+
+        # -----------------------------------------EZCAT--------------------------------------------
+        elif corpora.corpus_type == "ezcat":
+            self.tags = (
+                "Activation",
+                "BondFormation",
+                "Cleavage",
+                "ConformationalChange",
+                "CouplingReaction",
+                "Deprotonation",
+                "Destabilisation",
+                "ElectrophilicAttack",
+                "HybridisationChange",
+                "Inactivation",
+                "Interaction",
+                "Modulation",
+                "NucleophilicAttack",
+                "Others",
+                "Protonation",
+                "Release",
+                "Stabilisation",
+                "UncouplingReaction",
+                "WholeReaction",
+                "AminoAcid",
+                "Cofactor",
+                "EntityProperty",
+                "Enzyme",
+                "FunctionalGroup",
+                "MethodCue",
+                "NegationCue",
+                "OtherCompound",
+                "SpeculationCue",
+            )
+            self.exclude_tags = ()
+            self.relations = (
+                "Agent",
+                "Cue",
+                "EndPoint",
+                "InitialPoint",
+                "Means",
+                "Theme",
+            )
+        elif corpora.corpus_type == "ezcat_tr":
+            self.tags = (
+                "Activation",
+                "BondFormation",
+                "Cleavage",
+                "ConformationalChange",
+                "CouplingReaction",
+                "Deprotonation",
+                "Destabilisation",
+                "ElectrophilicAttack",
+                "HybridisationChange",
+                "Inactivation",
+                "Interaction",
+                "Modulation",
+                "NucleophilicAttack",
+                "Others",
+                "Protonation",
+                "Release",
+                "Stabilisation",
+                "UncouplingReaction",
+                "WholeReaction",
+                "AminoAcid",
+                "Cofactor",
+                "EntityProperty",
+                "Enzyme",
+                "FunctionalGroup",
+                "MethodCue",
+                "NegationCue",
+                "OtherCompound",
+                "SpeculationCue",
+            )
+            self.exclude_tags = (
+                "AminoAcid",
+                "Cofactor",
+                "EntityProperty",
+                "Enzyme",
+                "FunctionalGroup",
+                "MethodCue",
+                "NegationCue",
+                "OtherCompound",
+                "SpeculationCue",
+            )
+            self.relations = (
+                "Agent",
+                "Cue",
+                "EndPoint",
+                "InitialPoint",
+                "Means",
+                "Theme",
+            )
+        elif corpora.corpus_type == "ezcat_en":
+            self.tags = (
+                "Activation",
+                "BondFormation",
+                "Cleavage",
+                "ConformationalChange",
+                "CouplingReaction",
+                "Deprotonation",
+                "Destabilisation",
+                "ElectrophilicAttack",
+                "HybridisationChange",
+                "Inactivation",
+                "Interaction",
+                "Modulation",
+                "NucleophilicAttack",
+                "Others",
+                "Protonation",
+                "Release",
+                "Stabilisation",
+                "UncouplingReaction",
+                "WholeReaction",
+                "AminoAcid",
+                "Cofactor",
+                "EntityProperty",
+                "Enzyme",
+                "FunctionalGroup",
+                "MethodCue",
+                "NegationCue",
+                "OtherCompound",
+                "SpeculationCue",
+            )
+            self.exclude_tags = (
+                "Activation",
+                "BondFormation",
+                "Cleavage",
+                "ConformationalChange",
+                "CouplingReaction",
+                "Deprotonation",
+                "Destabilisation",
+                "ElectrophilicAttack",
+                "HybridisationChange",
+                "Inactivation",
+                "Interaction",
+                "Modulation",
+                "NucleophilicAttack",
+                "Others",
+                "Protonation",
+                "Release",
+                "Stabilisation",
+                "UncouplingReaction",
+                "WholeReaction",
+            )
+            self.relations = (
+                "Agent",
+                "Cue",
+                "EndPoint",
+                "InitialPoint",
+                "Means",
+                "Theme",
+            )
+
+        # -----------------------------------------LCGENES--------------------------------------------
+
+        elif corpora.corpus_type == 'lcgenes':
+            self.tags = (
+                'MMLite', 'GGPs', 'Disorder', 'Subject', 'Pharmacological_substance',
+                'Cell', 'Method_cue', 'Anatomical_entity', 'Cell_component', 'Organic_compound_other',
+                'Inorganic_compound',
+                'Artificial_process', 'Molecular_function', 'Biological_process', 'Cellular_process', 'Regulation',
+            )
+            self.exclude_tags = (
+            )
+            self.relations = (
+            )
+        elif corpora.corpus_type == 'lcgenes_tr':
+            self.tags = (
+                'MMLite', 'GGPs', 'Disorder', 'Subject', 'Pharmacological_substance',
+                'Cell', 'Method_cue', 'Anatomical_entity', 'Cell_component', 'Organic_compound_other',
+                'Inorganic_compound',
+                'Artificial_process', 'Molecular_function', 'Biological_process', 'Cellular_process', 'Regulation',
+            )
+            self.exclude_tags = (
+                'MMLite', 'GGPs', 'Disorder', 'Subject', 'Pharmacological_substance',
+                'Cell', 'Method_cue', 'Anatomical_entity', 'Cell_component', 'Organic_compound_other',
+                'Inorganic_compound',
+            )
+            self.relations = (
+            )
+        elif corpora.corpus_type == 'lcgenes_en':
+            self.tags = (
+                'MMLite', 'GGPs', 'Disorder', 'Subject', 'Pharmacological_substance',
+                'Cell', 'Method_cue', 'Anatomical_entity', 'Cell_component', 'Organic_compound_other',
+                'Inorganic_compound',
+                'Artificial_process', 'Molecular_function', 'Biological_process', 'Cellular_process', 'Regulation',
+            )
+            self.exclude_tags = (
+                'Artificial_process', 'Molecular_function', 'Biological_process', 'Cellular_process', 'Regulation',
+            )
+            self.relations = (
+            )
+        # -----------------------------------------IPF--------------------------------------------
+        elif corpora.corpus_type == 'ipf':
+            self.tags = (
+                'MMLite', 'GGPs', 'Disorder', 'Subject', 'Anatomical_entity', 'Cell', 'Method_cue',
+                'Pharmacological_substance', 'Entity_Property', 'Organic_compound_other', 'Cell_component',
+                'Inorganic_compound',
+                'Artificial_process', 'Positive_regulation', 'Gene_expression', 'Negative_regulation',
+                'Cellular_process', 'Biological_process', 'Pathway', 'Molecular_function', 'Regulation', 'Migration',
+                'Localization',
+            )
+            self.exclude_tags = (
+            )
+            self.relations = (
+                'Theme', 'Participant', 'Cause', 'disorder', 'atLoc',
+            )
+        elif corpora.corpus_type == 'ipf_tr':
+            self.tags = (
+                'MMLite', 'GGPs', 'Disorder', 'Subject', 'Anatomical_entity', 'Cell', 'Method_cue',
+                'Pharmacological_substance', 'Entity_Property', 'Organic_compound_other', 'Cell_component',
+                'Inorganic_compound',
+                'Artificial_process', 'Positive_regulation', 'Gene_expression', 'Negative_regulation',
+                'Cellular_process', 'Biological_process', 'Pathway', 'Molecular_function', 'Regulation', 'Migration',
+                'Localization',
+            )
+            self.exclude_tags = (
+                'MMLite', 'GGPs', 'Disorder', 'Subject', 'Anatomical_entity', 'Cell', 'Method_cue',
+                'Pharmacological_substance', 'Entity_Property', 'Organic_compound_other', 'Cell_component',
+                'Inorganic_compound',
+            )
+            self.relations = (
+                'Theme', 'Participant', 'Cause', 'disorder', 'atLoc',
+            )
+        elif corpora.corpus_type == 'ipf_en':
+            self.tags = (
+                'MMLite', 'GGPs', 'Disorder', 'Subject', 'Anatomical_entity', 'Cell', 'Method_cue',
+                'Pharmacological_substance', 'Entity_Property', 'Organic_compound_other', 'Cell_component',
+                'Inorganic_compound',
+                'Artificial_process', 'Positive_regulation', 'Gene_expression', 'Negative_regulation',
+                'Cellular_process', 'Biological_process', 'Pathway', 'Molecular_function', 'Regulation', 'Migration',
+                'Localization',
+            )
+            self.exclude_tags = (
+                'Artificial_process', 'Positive_regulation', 'Gene_expression', 'Negative_regulation',
+                'Cellular_process', 'Biological_process', 'Pathway', 'Molecular_function', 'Regulation', 'Migration',
+                'Localization',
+            )
+            self.relations = (
+                'Theme', 'Participant', 'Cause', 'disorder', 'atLoc',
+            )
+
+        # -----------------------------------------GPCR--------------------------------------------
+        elif corpora.corpus_type == 'gpcr':
+            self.tags = (
+                'Protein', 'GPCR', 'GPCR-ligand', 'Chemical', 'Cell', 'G-protein', 'Disease',
+                'Cell-component', 'Organism', 'Anatomy', 'Entity',
+                'Regulation', 'Positive_regulation', 'Biological_process', 'Negative_regulation', 'Pathway', 'Binding',
+                'Gene_expression', 'Artificial_process', 'Localization', 'Phosphorylation', 'Internalization',
+                'Biosynthesis', 'Conformational-change', 'Degradation', 'Conversion', 'Transportation', 'Dissociation',
+                'Transcription', 'Dephosphorylation', 'Translation',
+            )
+            self.exclude_tags = (
+            )
+            self.relations = (
+                'Theme', 'Cause', 'Participant', 'AtLoc', 'Site', 'Product', 'ToLoc', 'FromLoc',
+            )
+        elif corpora.corpus_type == 'gpcr_tr':
+            self.tags = (
+                'Protein', 'GPCR', 'GPCR-ligand', 'Chemical', 'Cell', 'G-protein', 'Disease',
+                'Cell-component', 'Organism', 'Anatomy', 'Entity',
+                'Regulation', 'Positive_regulation', 'Biological_process', 'Negative_regulation', 'Pathway', 'Binding',
+                'Gene_expression', 'Artificial_process', 'Localization', 'Phosphorylation', 'Internalization',
+                'Biosynthesis', 'Conformational-change', 'Degradation', 'Conversion', 'Transportation', 'Dissociation',
+                'Transcription', 'Dephosphorylation', 'Translation',
+            )
+            self.exclude_tags = (
+                'Protein', 'GPCR', 'GPCR-ligand', 'Chemical', 'Cell', 'G-protein', 'Disease',
+                'Cell-component', 'Organism', 'Anatomy', 'Entity',
+            )
+            self.relations = (
+                'Theme', 'Cause', 'Participant', 'AtLoc', 'Site', 'Product', 'ToLoc', 'FromLoc',
+            )
+        elif corpora.corpus_type == 'gpcr_en':
+            self.tags = (
+                'Protein', 'GPCR', 'GPCR-ligand', 'Chemical', 'Cell', 'G-protein', 'Disease',
+                'Cell-component', 'Organism', 'Anatomy', 'Entity',
+                'Regulation', 'Positive_regulation', 'Biological_process', 'Negative_regulation', 'Pathway', 'Binding',
+                'Gene_expression', 'Artificial_process', 'Localization', 'Phosphorylation', 'Internalization',
+                'Biosynthesis', 'Conformational-change', 'Degradation', 'Conversion', 'Transportation', 'Dissociation',
+                'Transcription', 'Dephosphorylation', 'Translation',
+            )
+            self.exclude_tags = (
+                'Regulation', 'Positive_regulation', 'Biological_process', 'Negative_regulation', 'Pathway', 'Binding',
+                'Gene_expression', 'Artificial_process', 'Localization', 'Phosphorylation', 'Internalization',
+                'Biosynthesis', 'Conformational-change', 'Degradation', 'Conversion', 'Transportation', 'Dissociation',
+                'Transcription', 'Dephosphorylation', 'Translation',
+            )
+            self.relations = (
+                'Theme', 'Cause', 'Participant', 'AtLoc', 'Site', 'Product', 'ToLoc', 'FromLoc',
+            )
+
+        # -----------------------------------------GE04--------------------------------------------
+        elif corpora.corpus_type == 'genia04':
+            self.tags = (
+                "protein",
+                "DNA",
+                "cell_type",
+                "cell_line",
+                "RNA",
+            )
+            self.exclude_tags = (
+            )
+            self.relations = (
+            )
+
+        self.actual_tags = (tag for tag in self.tags if tag not in self.exclude_tags)  # Not use set to keep order
+        for g, s in corpora.docs:
+            evaluator = SingleEvaluator(g, s, 2, mode, tag_type, verbose=verbose, exclude_tags=self.exclude_tags)
+            for target in ('tags', 'relations'):
+                for score in ('tp', 'fp', 'fn'):
+                    self.scores[target][score] += evaluator.scores[target][score]
+                measures = Measures(tp=evaluator.scores[target]['tp'],
+                                    fp=evaluator.scores[target]['fp'],
+                                    fn=evaluator.scores[target]['fn'],
+                                    tn=evaluator.scores[target]['tn'])
+                for score in ('precision', 'recall', 'f1'):
+                    fn = getattr(measures, score)
+                    self.scores[target]['macro'][score] += fn()
+
+        for target in ('tags', 'relations'):
+            # Normalization
+            for key in self.scores[target]['macro'].keys():
+                self.scores[target]['macro'][key] = \
+                    self.scores[target]['macro'][key] / len(corpora.docs)
+
+            measures = Measures(tp=self.scores[target]['tp'],
+                                fp=self.scores[target]['fp'],
+                                fn=self.scores[target]['fn'],
+                                tn=self.scores[target]['tn'])
+            for key in self.scores[target]['micro'].keys():
+                fn = getattr(measures, key)
+                self.scores[target]['micro'][key] = fn()
+
+
+def evaluate(corpora, mode='strict', verbose=False):
+    """Run the evaluation by considering only files in the two folders."""
+    assert mode in ('strict', 'lenient')
+    evaluator_s = MultipleEvaluator(corpora, verbose)
+    if corpora.track == 1:
+        macro_f1, macro_auc = 0, 0
+        print('{:*^96}'.format(' TRACK 1 '))
+        print('{:20}  {:-^30}    {:-^22}    {:-^14}'.format('', ' met ',
+                                                            ' not met ',
+                                                            ' overall '))
+        print('{:20}  {:6}  {:6}  {:6}  {:6}    {:6}  {:6}  {:6}    {:6}  {:6}'.format(
+            '', 'Prec.', 'Rec.', 'Speci.', 'F(b=1)', 'Prec.', 'Rec.', 'F(b=1)', 'F(b=1)', 'AUC'))
+        for tag in evaluator_s.tags:
+            print(
+                '{:>20}  {:<5.4f}  {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}'.format(
+                    tag.capitalize(),
+                    evaluator_s.scores[(tag, 'met', 'p')],
+                    evaluator_s.scores[(tag, 'met', 'r')],
+                    evaluator_s.scores[(tag, 'met', 'specificity')],
+                    evaluator_s.scores[(tag, 'met', 'f1')],
+                    evaluator_s.scores[(tag, 'not met', 'p')],
+                    evaluator_s.scores[(tag, 'not met', 'r')],
+                    evaluator_s.scores[(tag, 'not met', 'f1')],
+                    (evaluator_s.scores[(tag, 'met', 'f1')] + evaluator_s.scores[(tag, 'not met', 'f1')]) / 2,
+                    evaluator_s.scores[(tag, 'met', 'auc')]))
+            macro_f1 += (evaluator_s.scores[(tag, 'met', 'f1')] + evaluator_s.scores[(tag, 'not met', 'f1')]) / 2
+            macro_auc += evaluator_s.scores[(tag, 'met', 'auc')]
+        print('{:20}  {:-^30}    {:-^22}    {:-^14}'.format('', '', '', ''))
+        m = Measures(tp=evaluator_s.values['met']['tp'],
+                     fp=evaluator_s.values['met']['fp'],
+                     fn=evaluator_s.values['met']['fn'],
+                     tn=evaluator_s.values['met']['tn'])
+        nm = Measures(tp=evaluator_s.values['not met']['tp'],
+                      fp=evaluator_s.values['not met']['fp'],
+                      fn=evaluator_s.values['not met']['fn'],
+                      tn=evaluator_s.values['not met']['tn'])
+        print(
+            '{:>20}  {:<5.4f}  {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}'.format(
+                'Overall (micro)', m.precision(), m.recall(), m.specificity(),
+                m.f1(), nm.precision(), nm.recall(), nm.f1(),
+                (m.f1() + nm.f1()) / 2, m.auc()))
+        print(
+            '{:>20}  {:<5.4f}  {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}'.format(
+                'Overall (macro)',
+                evaluator_s.scores[('macro', 'met', 'p')],
+                evaluator_s.scores[('macro', 'met', 'r')],
+                evaluator_s.scores[('macro', 'met', 'specificity')],
+                evaluator_s.scores[('macro', 'met', 'f1')],
+                evaluator_s.scores[('macro', 'not met', 'p')],
+                evaluator_s.scores[('macro', 'not met', 'r')],
+                evaluator_s.scores[('macro', 'not met', 'f1')],
+                macro_f1 / len(evaluator_s.tags),
+                evaluator_s.scores[('macro', 'met', 'auc')]))
+        print()
+        print('{:>20}  {:^74}'.format('', '  {} files found  '.format(len(corpora.docs))))
+    else:
+        evaluator_l = MultipleEvaluator(corpora, mode='lenient', verbose=verbose)
+        print('{:*^70}'.format(' TRACK 2 '))
+        print('{:20}  {:-^22}    {:-^22}'.format('', ' strict ', ' lenient '))
+        print('{:20}  {:6}  {:6}  {:6}    {:6}  {:6}  {:6}'.format('', 'Prec.',
+                                                                   'Rec.',
+                                                                   'F(b=1)',
+                                                                   'Prec.',
+                                                                   'Rec.',
+                                                                   'F(b=1)'))
+        for tag in evaluator_s.actual_tags:
+            evaluator_tag_s = MultipleEvaluator(corpora, tag, verbose=verbose)
+            evaluator_tag_l = MultipleEvaluator(corpora, tag, mode='lenient', verbose=verbose)
+            print(
+                '{:>20}  {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}  {:<5.4f}   {:>5}  {:>5}   {:>5}   {:>5}  {:>5}   {:>5}'.format(
+                    tag.capitalize(),
+                    evaluator_tag_s.scores['tags']['micro']['precision'],
+                    evaluator_tag_s.scores['tags']['micro']['recall'],
+                    evaluator_tag_s.scores['tags']['micro']['f1'],
+                    evaluator_tag_l.scores['tags']['micro']['precision'],
+                    evaluator_tag_l.scores['tags']['micro']['recall'],
+                    evaluator_tag_l.scores['tags']['micro']['f1'],
+                    evaluator_tag_s.scores['tags']['tp'] +
+                    evaluator_tag_s.scores['tags']['fp'],
+                    evaluator_tag_s.scores['tags']['tp'] +
+                    evaluator_tag_s.scores['tags']['fn'],
+                    evaluator_tag_s.scores['tags']['tp'],
+                    evaluator_tag_l.scores['tags']['tp'] +
+                    evaluator_tag_l.scores['tags']['fp'],
+                    evaluator_tag_l.scores['tags']['tp'] +
+                    evaluator_tag_l.scores['tags']['fn'],
+                    evaluator_tag_l.scores['tags']['tp']))
+        print('{:>20}  {:-^48}'.format('', ''))
+        print('{:>20}  {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}  {:<5.4f}'.format(
+            'Overall (micro)',
+            evaluator_s.scores['tags']['micro']['precision'],
+            evaluator_s.scores['tags']['micro']['recall'],
+            evaluator_s.scores['tags']['micro']['f1'],
+            evaluator_l.scores['tags']['micro']['precision'],
+            evaluator_l.scores['tags']['micro']['recall'],
+            evaluator_l.scores['tags']['micro']['f1']))
+        print('{:>20}  {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}  {:<5.4f}'.format(
+            'Overall (macro)',
+            evaluator_s.scores['tags']['macro']['precision'],
+            evaluator_s.scores['tags']['macro']['recall'],
+            evaluator_s.scores['tags']['macro']['f1'],
+            evaluator_l.scores['tags']['macro']['precision'],
+            evaluator_l.scores['tags']['macro']['recall'],
+            evaluator_l.scores['tags']['macro']['f1']))
+        print()
+
+        print('{:*^70}'.format(' RELATIONS '))
+        for rel in evaluator_s.relations:
+            evaluator_tag_s = MultipleEvaluator(corpora, rel, mode='strict', verbose=verbose)
+            evaluator_tag_l = MultipleEvaluator(corpora, rel, mode='lenient', verbose=verbose)
+            print(
+                '{:>20}  {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}  {:<5.4f}    {:>5}  {:>5}   {:>5}   {:>5}  {:>5}   {:>5}'.format(
+                    '{}'.format(rel),
+                    evaluator_tag_s.scores['relations']['micro']['precision'],
+                    evaluator_tag_s.scores['relations']['micro']['recall'],
+                    evaluator_tag_s.scores['relations']['micro']['f1'],
+                    evaluator_tag_l.scores['relations']['micro']['precision'],
+                    evaluator_tag_l.scores['relations']['micro']['recall'],
+                    evaluator_tag_l.scores['relations']['micro']['f1'],
+                    evaluator_tag_s.scores['relations']['tp'] +
+                    evaluator_tag_s.scores['relations']['fp'],
+                    evaluator_tag_s.scores['relations']['tp'] +
+                    evaluator_tag_s.scores['relations']['fn'],
+                    evaluator_tag_s.scores['relations']['tp'],
+                    evaluator_tag_l.scores['relations']['tp'] +
+                    evaluator_tag_l.scores['relations']['fp'],
+                    evaluator_tag_l.scores['relations']['tp'] +
+                    evaluator_tag_l.scores['relations']['fn'],
+                    evaluator_tag_l.scores['relations']['tp']))
+        print('{:>20}  {:-^48}'.format('', ''))
+        print('{:>20}  {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}  {:<5.4f}'.format(
+            'Overall (micro)',
+            evaluator_s.scores['relations']['micro']['precision'],
+            evaluator_s.scores['relations']['micro']['recall'],
+            evaluator_s.scores['relations']['micro']['f1'],
+            evaluator_l.scores['relations']['micro']['precision'],
+            evaluator_l.scores['relations']['micro']['recall'],
+            evaluator_l.scores['relations']['micro']['f1']))
+        print('{:>20}  {:<5.4f}  {:<5.4f}  {:<5.4f}    {:<5.4f}  {:<5.4f}  {:<5.4f}'.format(
+            'Overall (macro)',
+            evaluator_s.scores['relations']['macro']['precision'],
+            evaluator_s.scores['relations']['macro']['recall'],
+            evaluator_s.scores['relations']['macro']['f1'],
+            evaluator_l.scores['relations']['macro']['precision'],
+            evaluator_l.scores['relations']['macro']['recall'],
+            evaluator_l.scores['relations']['macro']['f1']))
+        print()
+        print('{:20}{:^48}'.format('', '  {} files found  '.format(len(corpora.docs))))
+
+
+class Corpora(object):
+
+    def __init__(self, corpus_type, folder1, folder2, track_num):
+        extensions = {1: '*.xml', 2: '*.ann'}
+        file_ext = extensions[track_num]
+        self.track = track_num
+        self.folder1 = folder1
+        self.folder2 = folder2
+        self.corpus_type = corpus_type
+        files1 = set([os.path.basename(f) for f in glob.glob(
+            os.path.join(folder1, file_ext))])
+        # print(files1)
+        files2 = set([os.path.basename(f) for f in glob.glob(
+            os.path.join(folder2, file_ext))])
+        # print(files2)
+        common_files = files1 & files2  # intersection
+        if not common_files:
+            print('ERROR: None of the files match.')
+        else:
+            if files1 - common_files:
+                print('Files skipped in {}:'.format(self.folder1))
+                print(', '.join(sorted(list(files1 - common_files))))
+            if files2 - common_files:
+                print('Files skipped in {}:'.format(self.folder2))
+                print(', '.join(sorted(list(files2 - common_files))))
+        self.docs = []
+        for file in common_files:
+            if track_num == 1:
+                g = RecordTrack1(os.path.join(self.folder1, file))
+                s = RecordTrack1(os.path.join(self.folder2, file))
+            else:
+                g = RecordTrack2(os.path.join(self.folder1, file))
+                s = RecordTrack2(os.path.join(self.folder2, file))
+            self.docs.append((g, s))
+
+
+def main(corpus_type, f1, f2, track, verbose):
+    """Where the magic begins."""
+    corpora = Corpora(corpus_type, f1, f2, track)
+    if corpora.docs:
+        evaluate(corpora, verbose=verbose)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='n2c2: Evaluation script for Track 2')
+    parser.add_argument('folder1', help='First data folder path (gold)')
+    parser.add_argument('folder2', help='Second data folder path (system)')
+    parser.add_argument('--ner-eval-corpus', dest='corpus_type', type=str, required=True,
+                        help='ace / cg / cg_tr / ace_tr')
+    args = parser.parse_args()
+    main(args.corpus_type, os.path.abspath(args.folder1), os.path.abspath(args.folder2), 2, False)
diff --git a/loader/prepData/brat.py b/loader/prepData/brat.py
index 42f8e3f..b2ba019 100644
--- a/loader/prepData/brat.py
+++ b/loader/prepData/brat.py
@@ -3,33 +3,80 @@
 import glob
 import collections
 from collections import OrderedDict
+import os
 
 
 def brat_loader(files_fold, params):
     file_list = glob.glob(files_fold + '*' + '.txt')
 
+    triggers = OrderedDict()
     entities = OrderedDict()
-
+    relations = OrderedDict()
+    events = OrderedDict()
     sentences = OrderedDict()
 
     for filef in sorted(file_list):
         if filef.split("/")[-1].startswith("."):
             continue
-
         filename = filef.split('/')[-1].split('.txt')[0]
         ffolder = '/'.join(filef.split('/')[:-1]) + '/'
 
+
+
+        # store data for each document
+        ftriggers = OrderedDict()
         fentities = OrderedDict()
+        frelations = OrderedDict()
+        fevents = OrderedDict()
+
+        idsTR = []
+        typesTR = []
+        infoTR = OrderedDict()
+        termsTR = []
 
         idsT = []
         typesT = []
         infoT = OrderedDict()
         termsT = []
 
+        idsR = []
+        typesR = []
+        infoR = OrderedDict()
+
+        idsE = []
+        infoE = OrderedDict()
+        infoM = OrderedDict()
+
+        # # check empty file, otherwise, create an empty file to fix bug pipeline (temporarily)
+        # filepath = ffolder + filename + '.ann'
+        # if not os.path.isfile(filepath):
+        #     with open(filepath, 'w') as f:
+        #         print('EMPTY FILE: ', filepath)
+
         with open(ffolder + filename + '.ann', encoding="UTF-8") as infile:
             for line in infile:
 
-                if line.startswith('T'):
+                if line.startswith('TR'):
+                    line = line.rstrip().split('\t')
+                    trId = line[0]
+                    tr1 = line[1].split()
+                    trType = tr1[0]
+                    pos1 = tr1[1]
+                    pos2 = tr1[2]
+                    text = line[2]
+
+                    idsTR.append(trId)
+                    typesTR.append(trType)
+                    trigger_info = OrderedDict()
+                    trigger_info['id'] = trId
+                    trigger_info['type'] = trType
+                    trigger_info['pos1'] = pos1
+                    trigger_info['pos2'] = pos2
+                    trigger_info['text'] = text
+                    infoTR[trId] = trigger_info
+                    termsTR.append([trId, trType, pos1, pos2, text])
+
+                elif line.startswith('T'):
                     line = line.rstrip().split('\t')
                     eid = line[0]
                     e1 = line[1].split()
@@ -49,7 +96,98 @@ def brat_loader(files_fold, params):
                     infoT[eid] = ent_info
                     termsT.append([eid, etype, pos1, pos2, text])
 
+                elif line.startswith('R'):
+                    line = line.rstrip().split('\t')
+                    idR = line[0]
+                    typeR = line[1].split()[0]
+                    typeR = ''.join([i for i in typeR if not i.isdigit()])
+                    args = line[1].split()[1:]
+                    arg1id = args[0].split(':')[1]
+                    arg2id = args[1].split(':')[1]
+
+                    trig2 = False
+                    trig1 = False
+                    if arg1id.startswith('TR') and arg2id.startswith('TR'):
+                        trig2 = True
+                        trig1 = True
+                    elif arg1id.startswith('TR'):
+                        trig1 = True
+
+                    r_info = OrderedDict()
+                    r_info['id'] = idR
+                    r_info['type'] = typeR
+                    r_info['arg1id'] = arg1id
+                    r_info['arg2id'] = arg2id
+                    r_info['2trigger'] = trig2
+                    r_info['1trigger'] = trig1
+
+                    idsR.append(idR)
+                    typesR.append(typeR)
+                    infoR[idR] = r_info
+
+                elif line.startswith('E'):
+                    line = line.rstrip().split('\t')
+                    idE = line[0]
+                    args = line[1].split()
+                    tr1 = args[0].split(':')
+                    trType = tr1[0]
+                    trId = tr1[1]
+                    args_num = len(args) - 1
+
+                    nestedEv_ = []
+                    args2 = []
+                    args_ids = []
+                    for xx, arg in enumerate(args[1:]):
+                        role, eid = arg.split(':')
+                        role = ''.join([i for i in role if not i.isdigit()])
+                        args2.append((role, eid))
+                        args_ids.append(eid)
+                        if eid.startswith('E'):
+                            nestedEv_.append(eid)
+
+                    zeroArg = False
+                    if len(args2) == 0:
+                        args2 = [()]
+                        zeroArg = True
+
+                    if len(nestedEv_) > 0:
+                        evArg = True
+                    else:
+                        evArg = False
+
+                    idsE.append(idE)
+                    e_info = OrderedDict()
+                    e_info['id'] = idE
+                    e_info['trid'] = trId
+                    e_info['trtype'] = trType
+                    e_info['args_num'] = args_num
+                    e_info['args_data'] = args2
+                    e_info['is_zeroArg'] = zeroArg
+                    e_info['is_nested_ev'] = evArg
+                    e_info['nested_events'] = nestedEv_
+                    e_info['is_flat_ev'] = len(nestedEv_) == 0
+                    e_info['args_ids'] = args_ids
+
+                    e_info['modality'] = 'non-modality'
+
+                    infoE[idE] = e_info
+
+                elif line.startswith('M'):
+                    line = line.rstrip().split('\t')
+                    modals = line[1].split(' ')
+                    idev = modals[1]
+                    modal_type = modals[0]
+                    infoM[idev] = modal_type
+
+            typesTR2 = dict(collections.Counter(typesTR))
             typesT2 = dict(collections.Counter(typesT))
+            typesR2 = dict(collections.Counter(typesR))
+
+            ftriggers['data'] = infoTR
+            ftriggers['types'] = typesTR
+            ftriggers['counted_types'] = typesTR2
+            ftriggers['ids'] = idsTR
+            ftriggers['terms'] = termsTR
 
             fentities['data'] = infoT
             fentities['types'] = typesT
@@ -57,12 +195,26 @@ def brat_loader(files_fold, params):
             fentities['ids'] = idsT
             fentities['terms'] = termsT
 
-        # check empty entities
-        if len(idsT) == 0 and not params['raw_text']:
+            frelations['data'] = infoR
+            frelations['types'] = typesR
+            frelations['ids'] = idsR
+            frelations['counted_types'] = typesR2
+
+            for evid, modal_type in infoM.items():
+                infoE[evid]['modality'] = modal_type
+
+            fevents['data'] = infoE
+            fevents['ids'] = idsE
+
+        # check empty
+        if len(idsT) == len(idsTR) == 0:
             continue
 
         else:
             entities[filename] = fentities
+            triggers[filename] = ftriggers
+            relations[filename] = frelations
+            events[filename] = fevents
 
             lowerc = params['lowercase']
             with open(ffolder + filename + '.txt', encoding="UTF-8") as infile:
@@ -75,4 +227,4 @@ def brat_loader(files_fold, params):
                         lines.append(line)
                 sentences[filename] = lines
 
-    return entities, sentences
+    return triggers, entities, relations, events, sentences
diff --git a/loader/prepData/entity.py b/loader/prepData/entity.py
index 8f99437..0fdb3e9 100644
--- a/loader/prepData/entity.py
+++ b/loader/prepData/entity.py
@@ -28,14 +28,17 @@ def process_etypes(entities0):
     return entities1
 
 
-def process_tags(entities1):
+def process_tags(entities1, triggers1):
     typesT = entities1['types']
+    typesTR = triggers1['types']
+    types2 = typesT + typesTR
 
     tags = []
+    tagsTR = []
 
     tags2types = OrderedDict()
     tags2types['O'] = 'O'
-    for type in typesT:
+    for type in types2:
         btag = 'B-' + type
         itag = 'I-' + type
         tags.append(btag)
@@ -43,11 +46,17 @@ def process_tags(entities1):
         tags2types[btag] = type
         tags2types[itag] = type
 
+        if type in typesTR:
+            tagsTR.append(btag)
+            tagsTR.append(itag)
+
     tags0 = OrderedDict()
-    tags0['types'] = typesT
+    tags0['types'] = types2
+    tags0['typesTR'] = typesTR
     tags0['typesT'] = typesT
     tags0['tags'] = tags
     tags0['tags2types'] = tags2types
+    tags0['tagsTR'] = tagsTR
 
     return tags0
 
@@ -147,8 +156,9 @@ def spliter(line, _len=len):
     return offsets
 
 
-def process_entities(entities1, sentences1, params, dirpath):
+def process_entities(entities1, triggers1, sentences1, params, dirpath):
     entities0 = entities1['pmids']
+    triggers0 = triggers1['pmids']
 
     input0 = OrderedDict()
 
@@ -157,11 +167,15 @@ def process_entities(entities1, sentences1, params, dirpath):
 
     for pmid in entities0:
         entities = entities0[pmid]
+        triggers = triggers0[pmid]
         sentences = sentences0[pmid]
 
         terms = entities['terms']
+        terms.extend(triggers['terms'])
 
         nest_level, terms = count_nest_level(terms, params)
+        # nest_level, terms = utils.count_nest_level(terms)
+        # terms, file_discard_count = utils.dicard_invalid_nes(terms, sentences)
         levels.append(nest_level)
 
         abst_text = '\n'.join([sent['sentence'] for sent in sentences])
@@ -177,14 +191,31 @@ def process_entities(entities1, sentences1, params, dirpath):
                 init_char = next_char
         spans.append((init_char, next_char))
 
+        # doc_data = []
+        # tags_ = []
+        # terms_ = []
         for xx, sentence in enumerate(sentences):
+            # offsets, words = calculate_offset(sentences, xx)
             offsets = sentence['offsets']
+            # words = sentence['words']
+            # chars = sentence['chars']
+            # sent = sentence['sentence']
 
+            # nner
+            # tags, terms_sentence = utils.assign_label(offsets, terms)
             tags, tags_terms, terms_sentence = assign_label(offsets, terms)
+            # tags_.append(tags)
+            # terms_.append(terms_sentence)
+
+            # check sentence has no entity
+            # if len(terms_sentence) == 0:
+            #     print('NO ENTITY: ', pmid, xx, sentence['sentence'])
+
+            # tags_.extend([tag for level in tags for tag in level]) # for nested
 
             sentence['tags'] = tags
             sentence['terms'] = terms_sentence
-
+            # nner
             sentence['tags_terms'] = tags_terms
 
             eids = []
@@ -196,17 +227,25 @@ def process_entities(entities1, sentences1, params, dirpath):
             for eid in eids:
                 if eid in entities['data']:
                     readable_ents[eid] = entities['data'][eid]
+                else:
+                    readable_ents[eid] = triggers['data'][eid]
+            # sentence['readable_ents'] = readable_ents
 
+            # offsets2
             span = spans[xx]
-
+            # offs2 = []
+            # etypes2 = []
             for x, id_ in enumerate(eids):  # for every entity if it belongs to sentence span
                 ent = readable_ents[id_]
                 b = int(ent['pos1'])
                 e = int(ent['pos2'])
+                # b, e = offs[x]
                 if (span[0] <= b <= span[1]) and (span[0] <= e <= span[1]):
                     b2 = b - span[0]
                     e2 = e - span[0]
+                    # offs2.append([b2, e2])  #
 
+                    # etypes2.append(ent['type'])
                     ent['offs2'] = [b2, e2]
                 else:
                     print("SKIP ENTITY: " + str(b) + " --- " + str(e))
@@ -215,6 +254,7 @@ def process_entities(entities1, sentences1, params, dirpath):
 
             tokens = spliter(
                 sentence['sentence'])  # we have the tokens of the sentence and their corresponding offsets
+            tokensN = [tok for tok, b, e in tokens]
 
             for eid in eids:
                 if "offs2" not in readable_ents[eid]:
@@ -250,6 +290,7 @@ def process_entities(entities1, sentences1, params, dirpath):
             pad_label = [['O'] * len(tags[0])]
             tags.extend(pad_label * pad_level)
 
+            # nner
             tags_terms = sentence['tags_terms']
             pad_label = [['O'] * len(tags_terms[0])]
             tags_terms.extend(pad_label * pad_level)
@@ -261,10 +302,31 @@ def process_entities(entities1, sentences1, params, dirpath):
     return input0
 
 
+def entity_tags(dico):
+    """
+    Create a dictionary and a mapping of tags
+    """
+    id_to_tag = {0: 'O'}
+    id_to_type = {0: 'O'}
+    # id_to_tag = {}
+    # id_to_type = {}
+    for i, (k, v) in enumerate(dico.items()):
+        # if v != 'O':
+        id_to_tag[2 * i + 1] = 'I-' + v
+        id_to_tag[2 * i + 2] = 'B-' + v
+        id_to_type[2 * i + 2] = v
+
+    tag_to_id = {v: k for k, v in id_to_tag.items()}
+    type_to_id = {v: k for k, v in id_to_type.items()}
+
+    return id_to_tag, tag_to_id, id_to_type, type_to_id
+
+
 def extract_entities(sw_sentence, tag2id_mapping, id2tag_mapping, nn_mapping):
     # For several edge cases
     max_depth = max(len(tags) for _, tags, _ in sw_sentence)
 
+    # for sentence in sentences: (not using loop)
     entities = defaultdict(list)
     terms = defaultdict(list)
 
@@ -279,25 +341,27 @@ def extract_entities(sw_sentence, tag2id_mapping, id2tag_mapping, nn_mapping):
 
     try:
         tags = np.asarray(
-            [
-                [tag2id_mapping[tag] for tag in tags + ["O"] * max_depth][
-                :max_depth
-                ]
-                for _, tags, tags_terms in sw_sentence
+        [
+            # bug: original
+                        # [tag2id_mapping[tag] if tag in tag2id_mapping else tag2id_mapping["O"] for tag in tags + ["O"] * max_depth][
+            [tag2id_mapping[tag] for tag in tags + ["O"] * max_depth][
+            :max_depth
             ]
+            for _, tags, tags_terms in sw_sentence
+        ]
         ).T
     except KeyError as err:
         tags = np.asarray(
-            [
-                [tag2id_mapping[tag] if tag in tag2id_mapping else tag2id_mapping["O"] for tag in
-                 tags + ["O"] * max_depth][
-                :max_depth
-                ]
-                for _, tags, tags_terms in sw_sentence
+        [
+            [tag2id_mapping[tag] if tag in tag2id_mapping else tag2id_mapping["O"] for tag in tags + ["O"] * max_depth][
+            :max_depth
             ]
+            for _, tags, tags_terms in sw_sentence
+        ]
         ).T
         print(err)
 
+
     tags_terms = np.asarray(
         [
             [tag_term for tag_term in tags_terms + ["O"] * max_depth][
@@ -393,3 +457,37 @@ def convert_to_sub_words(word_tokens, tags, tags_terms, tokenizer=None):
             sw_sentence.append([token] + [tags[token_idx], tags_terms[token_idx]])
             subword_offset_mapping[token_idx] = token_idx
     return sw_sentence, subword_offset_mapping, subwords, valid_starts
+
+def convert_to_sub_words_lstm(word_tokens, tags, tags_terms, tokenizer=None):
+    subword_pos = 0
+    subword_offset_mapping = {}
+    subwords = []
+    sw_sentence = []
+
+    valid_starts = {0}
+
+    for token_idx, token in enumerate(word_tokens):
+        if tokenizer:
+            # subtokens = tokenizer.tokenize(token)
+            subtokens = [token]
+            if subtokens:
+                sw_sentence.append(subtokens[:1] + [tags[token_idx], tags_terms[token_idx]])
+                subword_offset_mapping[subword_pos] = token_idx
+                subword_pos += 1
+                # subwords.append(subtokens[:1][0])
+                subwords.append(subtokens[0])
+
+                labels = [re.sub("^B-", "I-", label) for label in tags[token_idx]]
+                ids = [re.sub("^B-", "I-", _id) for _id in tags_terms[token_idx]]
+
+                for subtoken in subtokens[1:]:
+                    sw_sentence.append([subtoken] + [labels, ids])
+                    subword_offset_mapping[subword_pos] = token_idx
+                    subword_pos += 1
+                    subwords.append(subtoken)
+
+            valid_starts.add(len(subwords))
+        else:
+            sw_sentence.append([token] + [tags[token_idx], tags_terms[token_idx]])
+            subword_offset_mapping[token_idx] = token_idx
+    return sw_sentence, subword_offset_mapping, subwords, valid_starts
diff --git a/loader/prepData/event.py b/loader/prepData/event.py
new file mode 100644
index 0000000..4790ed1
--- /dev/null
+++ b/loader/prepData/event.py
@@ -0,0 +1,270 @@
+"""Process events."""
+
+import collections
+from collections import OrderedDict
+
+
+def count_nested_events(events):
+    for pmid, fevents in events.items():
+        count_nested_evs_level(fevents['data'])
+
+
+def count_nested_evs_level(fevents):
+    for evid, evdata in fevents.items():
+        level = count_nested_ev_level(evdata, fevents, current_level=0)
+        evdata['nested_ev_level'] = level
+
+
+def count_nested_ev_level(evdata, fevents, current_level):
+    """Nested event level"""
+
+    # to avoid loop forever
+    if current_level > 20:
+        return current_level
+
+    # flat
+    if evdata['is_flat_ev']:
+        return current_level
+
+    # nested
+    elif 'nested_ev_level' in evdata:
+        return current_level + evdata['nested_ev_level']
+
+    else:
+        levels = []
+        args_ids = evdata['args_ids']
+        for arg_id in args_ids:
+            if arg_id.startswith('E'):
+                arg_evdata = fevents[arg_id]
+                if arg_evdata['is_flat_ev']:
+                    levels.append(current_level + 1)
+                else:
+                    arg_level = count_nested_ev_level(arg_evdata, fevents, current_level + 1)
+                    levels.append(arg_level)
+
+        level = max(levels)
+        return level
+
+
+def extract_events(events0, entities1):
+    """Extract event data"""
+
+    nflat = 0
+    n1nested = 0
+    nevents = 0
+
+    events1 = OrderedDict()
+    for pmid in events0:
+        events = events0[pmid]['data']
+        idsE = events0[pmid]['ids']
+        entities = entities1['pmids'][pmid]['data']
+        ev2_ = OrderedDict()
+
+        nevents += len(idsE)
+
+        # Read event data
+        for idE in events:
+            event = events[idE]
+            args_data = event['args_data']
+
+            nestedE = OrderedDict()
+            if event['is_nested_ev']:
+                n1nested += 1
+                for idnE in event['nested_events']:
+                    nE = events[idnE]
+                    nestedE[idnE] = nE
+            else:
+                nflat += 1
+
+            event['nested_events_info'] = nestedE
+
+            argTypes = []
+            argEntities = []
+            if event['args_num'] > 0:
+                for arg in args_data:
+                    typeR = arg[0]
+                    typeR = ''.join([i for i in typeR if not i.isdigit()])
+                    eid = arg[1]
+                    if eid in entities:
+                        typeT = entities[eid]['type']
+                        typeArg = typeR + '->' + typeT
+                        eArg = typeR + '->' + eid
+                    else:
+                        typeT = 'E'
+                        typeArg = (typeR, typeT)
+                        eArg = (typeR, eid)
+                    argTypes.append(typeArg)
+                    argEntities.append(eArg)
+
+            event['args_types'] = argTypes
+            event['args_entities'] = argEntities
+            ev2_[idE] = event
+
+        # Process nested events
+        for idE in ev2_:
+            event = ev2_[idE]
+            nestedE2 = False
+            if event['is_nested_ev']:
+                argsTypes = event['args_types']
+                argsTypes2 = []
+                for xx, arg in enumerate(event['args_data']):
+                    typeR = arg[0]
+                    eid = arg[1]
+                    typeArg = argsTypes[xx]
+                    if eid not in entities:
+                        nEvent = events[eid]
+                        if nEvent['is_nested_ev']:
+                            typeArg = (typeArg[0], 'nestedEV')
+                            nestedE2 = True
+                        else:
+                            if nEvent['is_zeroArg']:
+                                typenEvent = ('Nested1', nEvent['trtype'], ['None'])
+                            else:
+                                typenEvent = ('Nested1', nEvent['trtype'], nEvent['args_types'])
+
+                            typeArg = (typeR, typenEvent)
+
+                    argsTypes2.append(typeArg)
+
+                event['args_types'] = argsTypes2
+            event['is_nested_ev_level2'] = nestedE2
+            ev2_[idE] = event
+
+        events1[pmid] = ev2_
+
+    evNums = OrderedDict()
+    evNums['ev_num'] = nevents
+    evNums['ev_flat'] = nflat
+    evNums['nested_level1'] = n1nested
+
+    events3 = OrderedDict()
+    events3['pmids'] = events1
+    events3['evNum'] = evNums
+
+    return events3
+
+
+def string2pair(st):
+    """Parse line to event structure"""
+
+    pairs = []
+
+    pairs0 = st.split('+')
+    for pair in pairs0:
+        if '0' in pair:
+            pair0 = pair.split('0')
+            pairs.append(pair0)
+        elif '1' in pair:
+            pair0 = pair.split('1')
+            pairs.append(pair0)
+        elif '2' in pair:
+            pair0 = pair.split('2')
+            pairs.append(pair0)
+        elif '3' in pair:
+            pair0 = pair.split('3')
+            pairs.append(pair0)
+
+    return pairs
+
+
+def count_structures(structs0):
+    """Event structure"""
+
+    for typeTR, structs in structs0.items():
+
+        # store structures by each trigger type
+        structs_counts = dict(collections.Counter(structs))
+        structs_data = OrderedDict()
+
+        for struct, count in structs_counts.items():
+            pairs = string2pair(struct)
+            # structs_data[struct] = [pairs, count]
+            structs_data[struct] = [pairs]
+
+        # store structure data
+        structs0[typeTR] = structs_data
+
+    return structs0
+
+
+def extract_trigger_structures(events1, entities1):
+    """Event structure by trigger type"""
+
+    structs0 = collections.defaultdict(list)
+    structs1 = collections.defaultdict(list)
+
+    n_events = 0
+    n_1events = 0
+
+    for pmid in events1['pmids']:
+        events = events1['pmids'][pmid]
+        entities = entities1['pmids'][pmid]['data']
+
+        for idE in events:
+            event = events[idE]
+            trtype = event['trtype']
+            args_data = event['args_data']
+
+            n_events += 1
+
+            # nested event
+            if event['is_nested_ev']:
+                n_1events += 1
+                trtype = event['trtype']
+                args_data = event['args_data']
+
+                args_type = ''
+                for pair in args_data:
+                    if len(pair) > 0:
+                        typeR = pair[0]
+
+                        # event argument
+                        A2 = pair[1]
+
+                        # argument is entity: flat
+                        if A2 in entities:
+                            typeA2 = entities[A2]['type']
+                            type1 = typeR + '0' + typeA2
+
+                        # argument is event: nested
+                        else:
+                            typeA2 = events[A2]['trtype']
+                            type1 = typeR + '1' + typeA2
+
+                    else:
+                        type1 = 'None' + '0' + trtype
+                    if len(args_type) > 0:
+                        args_type += '+'
+                    args_type += type1
+                    event['args_type'] = args_type
+
+                structs1[trtype].append(args_type)
+
+            # flat event
+            else:
+                args_type = ''
+                for pair in args_data:
+                    if len(pair) > 0:
+                        typeR = pair[0]
+                        if pair[1] not in entities:
+                            print(pmid, pair[1])
+                            continue
+                        typeT = entities[pair[1]]['type']
+                        type1 = typeR + '0' + typeT
+                    else:
+                        type1 = 'None' + '0' + trtype
+                    if len(args_type) > 0:
+                        args_type += '+'
+                    args_type += type1
+
+                    event['args_type'] = args_type
+
+                structs0[trtype].append(args_type)
+
+    structs0 = count_structures(structs0)
+    structs1 = count_structures(structs1)
+
+    print('events: ', n_events, ' flat events: ', (n_events - n_1events))
+    print('nested: ', n_1events)
+
+    return {'structs0': structs0, 'structs1': structs1}, events1
diff --git a/loader/prepData/prepdata.py b/loader/prepData/prepdata.py
index 377a42f..cb5a6f0 100644
--- a/loader/prepData/prepdata.py
+++ b/loader/prepData/prepdata.py
@@ -1,27 +1,43 @@
-"""Load data from brat format and process for entity"""
-
-from collections import OrderedDict
+"""Load data from brat format and process for entity, trigger, relation, events."""
 
 from loader.prepData.brat import brat_loader
 from loader.prepData.sentence import prep_sentence_offsets, process_input
 from loader.prepData.entity import process_etypes, process_tags, process_entities
+from loader.prepData.event import extract_events, count_nested_events, extract_trigger_structures
 
 
 def prep_input_data(files_fold, params):
     # load data from *.ann files
-    entities0, sentences0 = brat_loader(files_fold, params)
+    triggers0, entities0, relations0, events0, sentences0 = brat_loader(files_fold, params)
 
     # sentence offsets
     sentences1 = prep_sentence_offsets(sentences0)
+    if 'pipeline_text_data' in params:
+        sent_words = []
+        for pmid in sentences0:
+            doc_data = params['pipeline_text_data'][pmid]
+            for sent, pipe_sent in zip(sentences1['doc_data'][pmid], doc_data):
+                sent['words'] = pipe_sent['words']
+                sent_words.append(sent['words'])
+                sent['offsets'] = pipe_sent['offsets']
+        sentences1['sent_words'] = sent_words
 
     # entity
     entities1 = process_etypes(entities0)  # all entity types
-    terms0 = process_tags(entities1)  # terms, offset, tags, etypes
-    input0 = process_entities(entities1, sentences1, params, files_fold)
+    triggers1 = process_etypes(triggers0)  # all trigger types
+    terms0 = process_tags(entities1, triggers1)  # terms, offset, tags, etypes
+    input0 = process_entities(entities1, triggers1, sentences1, params, files_fold)
+
+    # event
+    count_nested_events(events0)
+    events1 = extract_events(events0, entities1)
+    structsTR, events2 = extract_trigger_structures(events1, entities1)
 
     # prepare for training batch data for each sentence
-    input1 = process_input(input0)
+    input1 = process_input(input0, entities0, relations0, events2, params, files_fold)
 
+    #
+    print("Missing gold entities:")
     for doc_name, doc in sorted(input0.items(), key=lambda x: x[0]):
         entities = set()
         num_entities_per_doc = 0
@@ -35,18 +51,5 @@ def prep_input_data(files_fold, params):
         if diff:
             print(doc_name, sorted(diff, key=lambda _id: int(_id.replace("T", ""))))
 
-    # entity indices
-    g_entity_ids_ = OrderedDict()
-    for fid, fdata in entities0.items():
-        # get max entity id
-        eid_ = [eid for eid in fdata['ids'] if not eid.startswith('TR')]
-        ids_ = [int(eid.replace('T', '')) for eid in eid_]
-        if len(ids_) > 0:
-            max_id = max(ids_)
-        else:
-            max_id = 0
-        eid_.append(max_id)
-        g_entity_ids_[fid] = eid_
-
-    return {'entities': entities1, 'terms': terms0, 'sentences': sentences1, 'input': input1,
-            'g_entity_ids_': g_entity_ids_}
+    return {'entities': entities1, 'triggers': triggers1, 'terms': terms0, 'relations': relations0, 'events': events0,
+            'sentences': sentences1, 'input': input1, 'structsTR': structsTR}
diff --git a/loader/prepData/relation.py b/loader/prepData/relation.py
new file mode 100644
index 0000000..12c2603
--- /dev/null
+++ b/loader/prepData/relation.py
@@ -0,0 +1,118 @@
+"""Process relation information."""
+
+from collections import OrderedDict
+
+
+def process_relations(readable_entsA, readable_entsB, readable_ents, true_relations, unk, params):
+    r_idxs = OrderedDict()
+    readable_rels = OrderedDict()
+
+    for e1, ent1 in enumerate(readable_entsA):  # ent1 is A
+        if ent1 not in r_idxs:
+            r_idxs[ent1] = list(readable_ents.keys()).index(
+                ent1)  # find to which index corresponds from all entities
+        for e2, ent2 in enumerate(readable_entsB):
+            if ent2 not in r_idxs:  # ent2 is B
+                r_idxs[ent2] = list(readable_ents.keys()).index(ent2)
+
+            if (ent1, ent2) not in readable_rels:
+                readable_rels[(ent1, ent2)] = []
+            if (ent2, ent1) not in readable_rels:
+                readable_rels[(ent2, ent1)] = []
+
+            # A before B (in text)
+            Apos = readable_ents[ent1]['pos2']
+            Bpos = readable_ents[ent2]['pos1']
+
+            # if readable_ents[ent1][4][-1] <= readable_ents[ent2][4][0]:
+            if Apos <= Bpos:
+                pref_f = ''
+                pref_b = '_INV'
+                arg1 = ent1
+                arg2 = ent2
+            # B before A (in text)
+            else:
+                pref_f = '_INV'
+                pref_b = ''
+                arg1 = ent2
+                arg2 = ent1
+
+            Fpair = [('Arg1', arg1), ('Arg2', arg2)]  # forward
+            Rpair = [('Arg1', arg2), ('Arg2', arg1)]  # reverse
+
+            total_rels = len(true_relations)
+            not_found = 0
+            for rel in true_relations:  # existing relations
+
+                if rel[1] == 'Other':  # in case negative relations are already labeled
+                    # left-to-right
+                    readable_rels[(arg1, arg2)] = (rel[0] + pref_f, '1:Other:2')
+                    # right-to-left
+                    if params['direction'] != 'l2r':
+                        readable_rels[(arg2, arg1)] = (rel[0] + pref_b, '1:Other:2')
+
+                # AB existing relation
+                if Fpair == true_relations[rel]:
+                    # left-to-right
+                    if len(readable_rels[(arg1, arg2)]) == 0:
+                        readable_rels[(arg1, arg2)] = (rel[0] + pref_f, '1:' + rel[1] + ':2')
+                    # right-to-left
+                    if params['direction'] == 'neg':
+                        readable_rels[(arg2, arg1)] = (rel[0] + pref_b, '1:Other:2')
+                    elif params['direction'] == 'l2r+r2l':
+                        if len(readable_rels[(arg2, arg1)]) == 0:
+                            readable_rels[(arg2, arg1)] = (rel[0] + pref_b, '2:' + rel[1] + ':1')
+                # BA existing relation
+                elif Rpair == true_relations[rel]:
+                    # left-to-right
+                    if len(readable_rels[(arg1, arg2)]) == 0:
+                        readable_rels[(arg1, arg2)] = (rel[0] + pref_f, '2:' + rel[1] + ':1')
+                    # right-to-left
+                    if params['direction'] == 'neg':
+                        readable_rels[(arg2, arg1)] = (rel[0] + pref_b, '1:Other:2')
+                    elif params['direction'] == 'l2r+r2l':
+                        if len(readable_rels[(arg2, arg1)]) == 0:
+                            readable_rels[(arg2, arg1)] = (rel[0] + pref_b, '1:' + rel[1] + ':2')
+                else:
+                    not_found += 1
+
+            # this pair does not have a relation
+            if not_found == total_rels:
+                if readable_rels[(arg1, arg2)] or readable_rels[
+                    (arg2, arg1)]:  # if pair already there, don't do anything
+                    continue
+
+                rel_new_id = 'R-' + str(unk)
+
+                # left-to-right
+                readable_rels[(arg1, arg2)] = (rel_new_id + pref_f, '1:Other:2')
+                # right-to-left
+                if params['direction'] != 'l2r' and (ent1 != ent2):
+                    readable_rels[(arg2, arg1)] = (rel_new_id + pref_b, '1:Other:2')
+                unk += 1
+
+    return r_idxs, readable_rels
+
+
+def get_rtypes(data_struct, data_struct_dev):
+    rel_len = []
+    rels = []
+    for sid in data_struct['input']:
+        sent = data_struct['input'][sid]
+        rels2 = []
+        for (e1, e2) in sent['readable_r']:
+            if sent['readable_r'][(e1, e2)]:
+                rels2.append(sent['readable_r'][(e1, e2)][1])
+        rels.append(rels2)
+        rel_len.append(len(rels2))
+
+    for sid in data_struct_dev['input']:
+        sent = data_struct_dev['input'][sid]
+        rels2 = []
+        for (e1, e2) in sent['readable_r']:
+            if sent['readable_r'][(e1, e2)]:
+                rels2.append(sent['readable_r'][(e1, e2)][1])
+        rels.append(rels2)
+        rel_len.append(len(rels2))
+
+    return rels
diff --git a/loader/prepData/sentence.py b/loader/prepData/sentence.py
index ee59a2f..b177e94 100644
--- a/loader/prepData/sentence.py
+++ b/loader/prepData/sentence.py
@@ -4,6 +4,8 @@
 from collections import OrderedDict
 import numpy as np
 
+from loader.prepData.relation import process_relations
+
 
 def calculate_offset(sentences, i):
     """
@@ -33,6 +35,7 @@ def prep_sentence_offsets(sentences0):
     sentences_ = []
     sent_words = []
     words_ = []
+    # chars_ = []
     sentences1 = OrderedDict()
     sent_lens = []
     for pmid in sentences0:
@@ -42,14 +45,18 @@ def prep_sentence_offsets(sentences0):
         doc_data = []
         for xx, sentence in enumerate(sentences):
             offsets, words = calculate_offset(sentences, xx)
+            # chars = ["".join([w for w in words])]
+            # chars2 = [[c for c in w] for w in words]
 
             sent_lens.append(len(words))
             sent_words.append(words)
             words_.extend(words)
+            # chars_.extend(chars)
 
             doc_data.append({
                 'sentence': sentence,
                 'words': words,
+                # 'chars': chars2,
                 'offsets': offsets
             })
 
@@ -62,42 +69,130 @@ def prep_sentence_offsets(sentences0):
     sentences2['sentences'] = sentences_
     sentences2['sent_words'] = sent_words
     sentences2['words'] = words_
+    # sentences2['chars'] = chars_
     sentences2['max_sent_len'] = max_sent_len
 
     return sentences2
 
 
-def process_input(input0):
+def process_input(input0, entities0, relations0, events2, params, dirpath):
+    emissed = 0
+
     for pmid in input0:
         sentences_data = input0[pmid]
+        relations_data = relations0[pmid]['data']
+        events_data = events2['pmids'][pmid]
+        # events2_data = events2[pmid]
+
+        # selected = []
+        # abst_sents_rels = []
+        unk = 0
+        added_events = []
 
         for sid, sentence in enumerate(sentences_data):
             eids = sentence['eids']
             readable_ents = sentence['readable_ents']
 
+            cand_pairs = OrderedDict()
+            for idR in relations_data:
+
+                relation = relations_data[idR]
+                rol1 = 'Arg1'
+                rol2 = 'Arg2'
+                arg1 = relation['arg1id']
+                arg2 = relation['arg2id']
+                typeR = relation['type']
+                idR = relation['id']
+                p = (idR, typeR)
+                pair = [(rol1, arg1), (rol2, arg2)]
+
+                if arg1 in eids and arg2 in eids:
+                    # selected.append(p)
+                    cand_pairs[p] = pair
+
+            sentence['rels'] = cand_pairs
+            true_relations = cand_pairs
+
+            # RELATIONS
             readable_entsA = OrderedDict()
             read_temp = OrderedDict()
 
             for ee1 in eids:
                 if ee1.startswith('TR'):
-                    readable_entsA[ee1] = readable_ents[ee1]
+                    readable_entsA[ee1] = readable_ents[ee1]  # triggers
                 else:
-                    read_temp[ee1] = readable_ents[ee1]
-            readable_entsB = OrderedDict()
+                    read_temp[ee1] = readable_ents[ee1]  # entities
+            readable_entsB = OrderedDict()  # augment with triggers for trig-trig pairs
             readable_entsB.update(read_temp)
             readable_entsB.update(readable_entsA)
 
-            r_idxs = OrderedDict()
+            r_idxs, readable_rels = process_relations(readable_entsA, readable_entsB, readable_ents, true_relations,
+                                                      unk,
+                                                      params)
 
+            sentence['readable_r'] = readable_rels
             sentence['idx'] = r_idxs
 
             sent_evs = OrderedDict()
+            for idE in events_data:
+                event = events_data[idE]
+                idTR = event['trid']
+                if event['args_num'] == 0:
+                    if idTR in sentence['idx']:
+                        event['rel'] = {}
+                        sent_evs[idE] = event
+                else:
+
+                    args_data = event['args_data']
+                    isEvent = True
+                    rels = OrderedDict()
+                    for xx, arg1 in enumerate(args_data):
+                        typeR = arg1[0]
+                        idArg = arg1[1]
+                        if idArg in events_data and idTR in sentence['idx']:
+                            # argument is event
+                            argEv = events_data[idArg]
+                            idArg2 = argEv['trid']
+                            if (idTR, idArg2) in readable_rels:
+                                rel_data = readable_rels[(idTR, idArg2)]
+                                if typeR in rel_data[1]:
+                                    rels[(idTR, idArg2)] = [rel_data[0], typeR]
+                                    continue
+                                else:
+                                    isEvent = False
+                                    break
+                            else:
+                                isEvent = False
+                                break
+
+                        elif (idTR, idArg) in readable_rels:
+                            rel_data = readable_rels[(idTR, idArg)]
+                            if typeR in rel_data[1]:
+                                rels[(idTR, idArg)] = [rel_data[0], typeR]
+                                continue
+                            else:
+                                isEvent = False
+                                break
+                        else:
+                            isEvent = False
+                            break
+                    if isEvent:
+                        event['rel'] = rels
+                        sent_evs[idE] = event
 
             sentence['readable_ev'] = sent_evs
 
             trigger_ev = collections.defaultdict(list)
+            # idEvs = OrderedDict()
+            for idE in sent_evs:
+                event = sent_evs[idE]
+                idTR = event['trid']
+                trigger_ev[idTR].append(event)
 
             sentence['trigger_ev'] = trigger_ev
+            # sentence['idEvs'] = idEvs
+
+            added_events.extend([idE for idE in sent_evs])
 
     input1 = OrderedDict()
     for pmid in input0:
diff --git a/loader/prepNN/ent2net.py b/loader/prepNN/ent2net.py
index b6bb403..0c44bcf 100644
--- a/loader/prepNN/ent2net.py
+++ b/loader/prepNN/ent2net.py
@@ -2,17 +2,8 @@
 
 import collections
 
-from loader.prepData.entity import extract_entities, convert_to_sub_words
-
-
-def _elem2idx(list_of_elems, map_func):
-    """
-        :param list_of_elems: list of lists
-        :param map_func: mapping dictionary
-        :returns
-            list with indexed elements
-    """
-    return [[map_func[x] for x in list_of] for list_of in list_of_elems]
+from loader.prepData.entity import extract_entities, convert_to_sub_words, convert_to_sub_words_lstm
+from loader.prepNN.mapping import _elem2idx
 
 
 def entity2network(sentence_data, words, params, tokenizer):
@@ -20,7 +11,21 @@ def entity2network(sentence_data, words, params, tokenizer):
     tags = sentence_data['tags']
     tags_terms = sentence_data['tags_terms']
 
-    sw_sentence, sub_to_word, subwords, valid_starts = convert_to_sub_words(words,
+    # nner: Using subwords:
+    if params['predict'] and params['pipelines']:
+        if params['pipe_flag'] > 0:
+            tokenizer = None
+
+    # if use lstm
+    if params['use_lstm']:
+        sw_sentence, sub_to_word, subwords, valid_starts = convert_to_sub_words_lstm(words,
+                                                                                list(map(list, zip(*tags))),
+                                                                                list(map(list, zip(*tags_terms))),
+                                                                                tokenizer=tokenizer)
+
+    # or bert
+    else:
+        sw_sentence, sub_to_word, subwords, valid_starts = convert_to_sub_words(words,
                                                                             list(map(list, zip(*tags))),
                                                                             list(map(list, zip(*tags_terms))),
                                                                             tokenizer=tokenizer)
@@ -34,15 +39,21 @@ def entity2network(sentence_data, words, params, tokenizer):
     tagsIDs = list(map(list, zip(*tagsIDs)))
 
     tagsT = []
+    tagsTR = []
     for tag in tagsIDs:
-        tagsT.append(tag)
+        if tag[0] in params['trTags_Ids']:
+            tagsTR.append(tag)
+        else:
+            tagsT.append(tag)
 
     readable_e = sentence_data['readable_ents']
     idxs = sentence_data['idx']
     rev_idxs = {id: ent for ent, id in idxs.items()}
     toks2 = []
     etypes2 = []
+    # ents = OrderedDict()
     ents = collections.defaultdict(list)
+    # dup_ent_tag = False
     for xx in range(0, len(idxs)):
 
         ent = rev_idxs[xx]
@@ -57,6 +68,8 @@ def entity2network(sentence_data, words, params, tokenizer):
             toksid = toks[0]
         ents[toksid].append([ent, readable_e[ent]['offs2'], readable_e[ent]['text']])
 
+    # fix bug for mlee
+    # etypes2ids = [params['mappings']['type_map'][etype] if etype in params['mappings']['type_map'] else params['mappings']['type_map']['Metabolism'] for etype in etypes2]
     etypes2ids = [params['mappings']['type_map'][etype] for etype in etypes2]
 
-    return readable_e, idxs, ents, toks2, etypes2ids, entities, sw_sentence, sub_to_word, subwords, valid_starts, tagsIDs, terms
+    return readable_e, idxs, ents, toks2, etypes2ids, entities, sw_sentence, sub_to_word, subwords, valid_starts, tagsIDs, tagsTR, terms
diff --git a/loader/prepNN/ev2net.py b/loader/prepNN/ev2net.py
new file mode 100644
index 0000000..10cfd12
--- /dev/null
+++ b/loader/prepNN/ev2net.py
@@ -0,0 +1,306 @@
+"""Prepare event data for training networks."""
+
+import collections
+import numpy as np
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def create_ev_truth(idxs, readable_e, events, params):
+    ev_num = 0
+    ev_matched = 0
+    max_ev_per_layer = 0
+
+    truth_ev = -1 * np.ones((len(readable_e), 2), dtype=np.object)
+
+    truth_ev0 = collections.defaultdict(list)
+    type_debug = False
+
+    for idTR in events:
+        xxTR = idxs[idTR]
+
+        ev_l_0 = 0
+        ev_l_1 = 0
+
+        for event in events[idTR]:
+            event['modality'] = params['mappings']['modality_map'][event['modality']]
+            ev_num += 1
+
+            typeTR = event['trtype']
+
+            struct = event['args_type']
+
+            if typeTR in params['mappings']['flat_structs_map']:
+                if struct in params['mappings']['flat_structs_map'][typeTR]:
+                    ev_matched += 1
+                    ev_l_0 += 1
+                    ev_argtype = event['args_type']
+                    args_ids = params['mappings']['flat_structs_map'][typeTR][ev_argtype]
+
+                    rels = event['rel']
+                    a2ids = []
+                    for rel, reltype in rels.items():
+                        a2id = idxs[rel[1]]
+                        a2ids.append(a2id)
+
+                    # self event
+
+                    if event['is_zeroArg']:
+                        a2ids.append(xxTR)
+
+                    truth_ev0[(xxTR, 0)].append([args_ids, a2ids])
+
+            if typeTR in params['mappings']['nested_structs_map']:
+                if struct in params['mappings']['nested_structs_map'][typeTR]:
+                    ev_matched += 1
+                    ev_l_1 += 1
+                    ev_argtype = event['args_type']
+                    args_ids = params['mappings']['nested_structs_map'][typeTR][ev_argtype]
+
+                    rels = event['rel']
+                    a2ids = []
+                    for rel, reltype in rels.items():
+                        a2id = idxs[rel[1]]
+                        a2ids.append(a2id)
+
+                    truth_ev0[(xxTR, 1)].append([args_ids, a2ids])
+
+        max_ev_per_layer = max(ev_l_0, ev_l_1, max_ev_per_layer)
+
+    for trid, pairs in truth_ev0.items():
+        truth_ev[trid[0]][trid[1]] = pairs
+
+    no_event = False
+    if len(truth_ev0) == 0:
+        no_event = True
+
+    ev_missed = ev_num - ev_matched
+
+    return truth_ev, ev_num, ev_matched, ev_missed, no_event, type_debug, max_ev_per_layer
+
+
+def event2network(sentence_data, fid, idxs, events_map, max_ev_per_layer, readable_e, params):
+    # input
+    events = sentence_data['trigger_ev']
+
+    # create labels for events
+    truth_ev, ev_num, ev_matched, ev_missed, no_event, type_debug, max_ev_per_layer_ = create_ev_truth(idxs,
+                                                                                                       readable_e,
+                                                                                                       events, params)
+
+    # C2T add
+    max_ev_per_layer = max(max_ev_per_layer_, max_ev_per_layer)
+
+    # ev_num2 += ev_num
+    # ev_matched2 += ev_matched
+    # ev_missed2 += ev_missed
+
+    # Add events to map:
+    for _, events_list in enumerate(events.items()):
+        for event in events_list[1]:
+            if fid not in events_map:
+                events_map[fid] = {event['id']: event}
+            else:
+                events_map[fid][event['id']] = event
+
+    return events, truth_ev, max_ev_per_layer
+
+
+def count_ev_truth(samples):
+    """Count the number of created truth events."""
+
+    # count total number of valid truth events
+    total_count_valid_evs = 0
+
+    # for each sentence
+    for sample in samples:
+        # get truth
+        truth_ev = sample['truth_ev']
+
+        # count the valid event truth
+        valid_truth_ev = truth_ev[truth_ev != -1]
+        count_valid_ev = sum([len(truth_list) for truth_list in valid_truth_ev])
+        total_count_valid_evs += count_valid_ev
+
+    print('Check created event truth')
+    print('Valid truth events: ', total_count_valid_evs)
+
+    return
+
+
+def gen_nn_truth_nested_ev(fid, typeTR, struct, mapping_structs, event, span_terms, ev_idx, events_map, params,
+                           self_event=False):
+    try:
+        if typeTR in mapping_structs:
+            if struct in mapping_structs[typeTR]:
+                ev_argtype = event['args_type']
+                args_ids = mapping_structs[typeTR][ev_argtype]
+                rels = event['rel']
+
+                # store entity arguments and event arguments
+                ent_args = []
+                ev_args = []
+
+                if len(event['nested_events']) > 0:
+                    nested_evs = [events_map[fid][eid] if eid in events_map[fid] else -1 for eid in
+                                  event['nested_events']]
+                    nested_trIds = [ev['trid'] if ev != -1 else -1 for ev in nested_evs]
+                for rel, reltype in rels.items():
+                    argid = rel[1]
+
+                    # is event argument
+                    if len(event['nested_events']) > 0:
+
+                        # is trigger
+                        if argid in nested_trIds:
+                            nested_ev = nested_evs[nested_trIds.index(argid)]
+                            a2id = gen_nn_truth_nested_evs(fid, nested_ev, span_terms, events_map, params)
+                            ev_args.append(a2id)
+
+                        # is entity
+                        else:
+                            a2id = span_terms.term2id[argid]
+                            ent_args.append(a2id)
+
+                    # or flat
+                    else:
+                        a2id = span_terms.term2id[argid]
+                        ent_args.append(a2id)
+
+                # self event
+                if self_event:
+                    if event['is_zeroArg']:
+                        ent_args.append(ev_idx)
+
+                if len(ent_args) > 0:
+                    ent_args = collections.Counter(ent_args)
+
+                nested_ev_level = event['nested_ev_level']
+                truth_out = (nested_ev_level, args_ids, ent_args, ev_args)
+                return truth_out
+
+    except (KeyError, ValueError) as err:
+        logger.debug(err)
+        return None
+
+
+def gen_nn_truth_nested_evs(fid, nested_ev, span_terms, events_map, params):
+    nested_idTR = nested_ev['trid']
+    nested_ev_idx = span_terms.term2id[nested_idTR]
+    typeTR = nested_ev['trtype']
+    struct = nested_ev['args_type']
+    nested_ev_present = gen_nn_truth_nested_ev(fid, typeTR, struct, params['mappings']['flat_structs_map'], nested_ev,
+                                               span_terms, nested_ev_idx, events_map, params, self_event=True)
+    if not nested_ev_present:
+        nested_ev_present = gen_nn_truth_nested_ev(fid, typeTR, struct, params['mappings']['nested_structs_map'],
+                                                   nested_ev,
+                                                   span_terms, nested_ev_idx, events_map, params)
+
+    return nested_ev_present
+
+
+def gen_nn_truth_ev(fid, truth_ev_layer, typeTR, struct, mapping_structs, event, span_terms, ev_idx, events_map, params,
+                    self_event=False):
+    try:
+        if typeTR in mapping_structs:
+            if struct in mapping_structs[typeTR]:
+                ev_argtype = event['args_type']
+                struct_ids = mapping_structs[typeTR][ev_argtype]
+                rels = event['rel']
+
+                # store entity and event arguments
+                ent_args_list = []
+                ev_args_list = []
+
+                if len(event['nested_events']) > 0:
+                    nested_evs = [events_map[fid][eid] if eid in events_map[fid] else -1 for eid in
+                                  event['nested_events']]
+                    nested_trIds = [ev['trid'] if ev != -1 else -1 for ev in nested_evs]
+                for rel, reltype in rels.items():
+                    argid = rel[1]
+                    if len(event['nested_events']) > 0:
+
+                        # is trigger
+                        if argid in nested_trIds:
+                            nested_ev = nested_evs[nested_trIds.index(argid)]
+                            a2id = gen_nn_truth_nested_evs(fid, nested_ev, span_terms, events_map, params)
+                            ev_args_list.append(a2id)
+
+                        # is entity
+                        else:
+                            a2id = span_terms.term2id[argid]
+                            ent_args_list.append(a2id)
+
+                    # is flat
+                    else:
+                        a2id = span_terms.term2id[argid]
+                        ent_args_list.append(a2id)
+
+                # self event
+                if self_event:
+                    if event['is_zeroArg']:
+                        ent_args_list.append(ev_idx)
+
+                mod_label = event['modality']
+                if len(ent_args_list) > 0:
+                    ent_args_list = collections.Counter(ent_args_list)
+                truth_out = [(struct_ids, ent_args_list, ev_args_list), mod_label]
+                truth_ev_layer.append(truth_out)
+
+    except (KeyError, ValueError) as err:
+        logger.debug(err)
+
+
+def gen_nn_truth_evs(fid, span_terms, events, events_map, params):
+    truth_ev = -1 * np.ones((len(events), params['max_ev_level'] + 1, params['max_ev_args'] + 1), dtype=np.object)
+    ev_lbls = -1 * np.ones((len(events)), dtype=np.object)
+    ev_idxs = {}
+
+    truth_ev_dict = collections.defaultdict(list)
+    ev_lbls_dict = collections.defaultdict(list)
+
+    # store list of events for each trigger id
+    ev_idxs_lst = []
+
+    for idTR in events:
+        if idTR in span_terms.term2id:
+
+            # event trigger index
+            ev_trid = span_terms.term2id[idTR]
+
+            ev_idxs_lst.append(ev_trid)
+            for i, event in enumerate(events[idTR]):
+                mod_label = event['modality']
+                typeTR = event['trtype']
+                struct = event['args_type']
+
+                # get the number of arguments, and nested level
+                arg_num = event['args_num']
+                nested_ev_level = event['nested_ev_level']
+
+                # flat events
+                gen_nn_truth_ev(fid, truth_ev_dict[(ev_trid, 0, arg_num)], typeTR, struct,
+                                params['mappings']['flat_structs_map'], event,
+                                span_terms, ev_trid, events_map, params, self_event=True)
+
+                # nested events
+                gen_nn_truth_ev(fid, truth_ev_dict[(ev_trid, nested_ev_level, arg_num)], typeTR, struct,
+                                params['mappings']['nested_structs_map'], event,
+                                span_terms, ev_trid, events_map, params)
+                ev_lbls_dict[ev_trid].append(mod_label)
+
+    for i, ev_trid in enumerate(ev_idxs_lst):
+        ev_idxs[ev_trid] = i
+        ev_lbls[i] = ev_lbls_dict[ev_trid]
+        for level in range(params['max_ev_level'] + 1):
+            for narg in range(params['max_ev_args'] + 1):
+                try:
+                    if len(truth_ev_dict[(ev_trid, level, narg)]) > 0:
+                        truth_ev[i][level][narg] = truth_ev_dict[(ev_trid, level, narg)]
+                    else:
+                        truth_ev[i][level][narg] = -1
+                except KeyError:
+                    truth_ev[i][level][narg] = -1
+
+    return truth_ev, ev_idxs, ev_lbls
diff --git a/loader/prepNN/mapping.py b/loader/prepNN/mapping.py
new file mode 100644
index 0000000..0d32e3b
--- /dev/null
+++ b/loader/prepNN/mapping.py
@@ -0,0 +1,252 @@
+"""Generate mappings"""
+
+import itertools
+from collections import OrderedDict
+from collections import Counter
+import numpy as np
+
+from loader.prepData.entity import entity_tags
+from loader.prepData.relation import get_rtypes
+from loader.prepNN.structure import process_structure
+
+
+def _generate_mapping(list_of_elems):
+    """
+        :param list_of_elems: list of elements (single or nested)
+        :returns
+            dictionary with a unique id for each element
+    """
+    # list of lists
+    elem_count = OrderedDict()
+    if all(isinstance(el, list) for el in list_of_elems):
+        for item in itertools.chain.from_iterable(list_of_elems):
+            if item not in elem_count:
+                elem_count[item] = 1
+            else:
+                elem_count[item] += 1
+    # single lists
+    else:
+        for item in list_of_elems:
+            if item not in elem_count:
+                elem_count[item] = 1
+            else:
+                elem_count[item] += 1
+    elem_count = sorted(elem_count.items(), key=lambda x: x[1])  # sort from low to high freq
+    mapping = OrderedDict([(elem, i) for i, (elem, val) in enumerate(elem_count)])
+    rev_mapping = OrderedDict([(v, k) for k, v in mapping.items()])
+    return mapping, rev_mapping, len(elem_count)
+
+
+def _find_singletons(list_of_elems, args, min_w_freq):
+    """
+        :param list_of_elems: list of all words in a train dataset
+        :returns
+            number of words with frequency = 1
+    """
+    elem_count = Counter([x for x in list_of_elems])
+    unique_args = list(set(itertools.chain.from_iterable([a.split(' ') for a in args])))
+    singles = [elem for elem, val in elem_count.items() if ((val <= min_w_freq) and (elem not in unique_args))]
+    return singles
+
+
+def generate_map(data_struct, data_struct_dev, data_struct_test, params): # add test for mlee
+
+    # 1. words mapping
+    words = data_struct['sentences']['sent_words']
+    words_train = data_struct['sentences']['words']
+    words.append(['<UNK>'])
+    word_map, rev_word_map, word_size = _generate_mapping(words)
+
+    # 2. ..
+    # labels of entity (in .a1)
+    argumentsT = data_struct['entities']['arguments']
+
+    # labels of trigger (in .a2)
+    argumentsTR = data_struct['triggers']['arguments']
+    arguments = argumentsT + argumentsTR
+    singlesW = _find_singletons(words_train, arguments, params['min_w_freq'])
+
+    typesTR = data_struct['terms']['typesTR']
+    typesTR.extend(data_struct_dev['terms']['typesTR'])
+
+    typesT = data_struct['terms']['typesT']
+    typesT.extend(data_struct_dev['terms']['typesT'])
+
+    # add for test: fig bug for mlee
+    typesTR.extend(data_struct_test['terms']['typesTR'])
+    typesT.extend(data_struct_test['terms']['typesT'])
+
+    all_types = []
+    for type in typesTR:
+        if type not in all_types:
+            all_types.append(type)
+
+    for type in typesT:
+        if type not in all_types:
+            all_types.append(type)
+
+    type_map = {type: id for id, type in enumerate(all_types)}
+    rev_type_map = {id: type for type, id in type_map.items()}
+    type_size = len(type_map)
+
+    typeTR_map = {}
+    for type, id in type_map.items():
+        if type in typesTR:
+            typeTR_map[type] = id
+    rev_typeTR_map = {id: type for type, id in typeTR_map.items()}
+    # typeTR_size = len(typeTR_map)
+
+    rev_tag_map, tag_map, _, _ = entity_tags(rev_type_map)
+
+    tag_size = len(tag_map)
+
+    trTypeIds = [id for id in rev_typeTR_map]
+
+    tagsTR = data_struct['terms']['tagsTR']
+    tagsTR2 = data_struct_dev['terms']['tagsTR']
+    tagsTR.extend([tag for tag in tagsTR2 if tag not in tagsTR])
+    rev_tag_mapTR = {tag_map[tag]: tag for tag in tagsTR}
+
+    tag_mapTR = {tag: id for id, tag in rev_tag_mapTR.items()}
+    trTagsIds = [tag for tag in rev_tag_mapTR]
+
+    tag2type = data_struct['terms']['tags2types']
+    tag2type2 = data_struct_dev['terms']['tags2types']
+    for tag in tag2type2:
+        if tag not in tag2type:
+            tag2type[tag] = tag2type2[tag]
+    tag2type_map = OrderedDict()
+    for tag in tag2type:
+        if tag != 'O':
+            type = tag2type[tag]
+            tag2type_map[tag_map[tag]] = type_map[type]
+    tag2type_map[0] = -1  # tag O
+
+    tag2type = np.zeros(tag_size, np.int32)
+    for tag, type in tag2type_map.items():
+        tag2type[tag] = type
+
+    # 3. pos map
+    all_sents = data_struct['sentences']['sentences']
+    all_sents.extend(data_struct_dev['sentences']['sentences'])
+
+    length = [len([w for w in s.split()]) for s in all_sents]
+    ranges = [list(map(str, list(range(-l + 1, l)))) for l in length]
+    if params['include_nested']:
+        ranges.append(['inner'])  # encode nestedness embeddings
+        ranges.append(['outer'])
+    pos_map, rev_pos_map, pos_size = _generate_mapping(ranges)
+
+    # 4. rel map
+    rels = get_rtypes(data_struct, data_struct_dev)
+    rel_map, rev_rel_map, rel_size = _generate_mapping(rels)
+
+    # Generate relation maps with L R distinguishing
+    rtype_map = {'Other': -1}
+    rel2rtype_map = {}
+    for rel in rel_map:
+        relid = rel_map[rel]
+        rtype = rel.split(':')[1]
+        if '1:' in rel and rtype != 'Other':  # ony lef to right
+            rtype_map[rtype] = relid
+
+    for rel in rel_map:
+        relid = rel_map[rel]
+        rtype = rel.split(':')[1]
+        rtypeid = rtype_map[rtype]
+        rel2rtype_map[relid] = rtypeid
+
+    rel2rtype_map2 = np.zeros((len(rel2rtype_map)), dtype=np.int32)
+    for rel, rtype in rel2rtype_map.items():
+        rel2rtype_map2[rel] = rtype
+
+    rev_rtype_map = {id: type for type, id in rtype_map.items()}
+    # rev_rtype_map[rel_size] = 'None'  # for the none relation in events
+
+    # generate mappings for event structures
+    flat_structs_map, nested_structs_map, flat_types_id_map, nested_types_id_map, etype_pairs = process_structure(
+        data_struct, data_struct_dev, params, type_map, typeTR_map, rtype_map, type_size, rel_size)
+
+    # modality
+    modality_map = {'non-modality': 1, 'Speculation': 2, 'Negation': 3}
+    rev_modality_map = {id: type for type, id in modality_map.items()}
+    ev_size = len(modality_map)
+
+    # return
+    params['voc_sizes'] = {'word_size': word_size,
+                           'etype_size': type_size,
+                           'tag_size': tag_size,
+                           'pos_size': pos_size,
+                           'rel_size': rel_size,
+                           'ev_size': ev_size
+                           }
+    params['mappings'] = {'word_map': word_map, 'rev_word_map': rev_word_map,
+                          'type_map': type_map, 'rev_type_map': rev_type_map,
+                          'typeTR_map': typeTR_map, 'rev_typeTR_map': rev_typeTR_map,
+                          'tag_map': tag_map, 'rev_tag_map': rev_tag_map,
+                          'tag_mapTR': tag_mapTR, 'rev_tag_mapTR': rev_tag_mapTR,
+                          'tag2type_map': tag2type,
+                          'pos_map': pos_map, 'rev_pos_map': rev_pos_map,
+                          'rel_map': rel_map, 'rev_rel_map': rev_rel_map,
+                          'rtype_map': rtype_map, 'rev_rtype_map': rev_rtype_map,
+                          'rel2rtype_map': rel2rtype_map2,
+                          'flat_structs_map': flat_structs_map, 'flat_types_id_map': flat_types_id_map,
+                          'nested_structs_map': nested_structs_map, 'nested_types_id_map': nested_types_id_map,
+                          'modality_map': modality_map, 'rev_modality_map': rev_modality_map,
+                          'etype_pairs': etype_pairs
+                          }
+    params['trTags_Ids'] = trTagsIds
+    params['trTypes_Ids'] = trTypeIds
+    params['words_train'] = words_train
+    params['singletons'] = singlesW
+    params['max_sent_len'] = np.maximum(data_struct['sentences']['max_sent_len'],
+                                        data_struct_dev['sentences']['max_sent_len'])
+    params['rtype_trig_ev'] = rel_size
+
+    return params
+
+
+def find_ignore_label(params):
+    """
+        :return:
+            id corresponds to the "Other" relation
+            dictionary with directionality, e.g. relation_mapping['1:Rel:2'] = 3
+                                                 relation_mapping['2:Rel:1'] = 8
+                                                 lab_map[3] = 8, lab_map[8] = 3
+    """
+    lab2ign_id = params['mappings']['rel_map'][params['lab2ign']]
+
+    # Map key of relation 1:REL:2 with 2:REL:1, else , map this key with itself, also map ignored keys
+    lab_map = OrderedDict()
+    for m, n in params['mappings']['rel_map'].items():
+        for m2, n2 in params['mappings']['rel_map'].items():
+            if m == m2:
+                continue
+            elif m == params['lab2ign'] or m2 == params['lab2ign']:
+                continue
+            elif m.split(':')[1] == m2.split(':')[1]:
+                lab_map[n] = n2
+
+    for m, n in params['mappings']['rel_map'].items():
+        if n not in lab_map:
+            lab_map[n] = n
+
+    lab_map[lab2ign_id] = lab2ign_id
+    params['lab_map'] = lab_map
+    params['lab2ign_id'] = lab2ign_id
+    return params
+
+
+def _elem2idx(list_of_elems, map_func):
+    """
+        :param list_of_elems: list of lists
+        :param map_func: mapping dictionary
+        :returns
+            list with indexed elements
+    """
+    # fix bug for mlee
+    # return [[map_func[x] if x in map_func else map_func["O"] for x in list_of] for list_of in list_of_elems]
+    return [[map_func[x] for x in list_of] for list_of in list_of_elems]
+
+
+
diff --git a/loader/prepNN/prep4nn.py b/loader/prepNN/prep4nn.py
index 75200c6..f488b6e 100644
--- a/loader/prepNN/prep4nn.py
+++ b/loader/prepNN/prep4nn.py
@@ -1,12 +1,15 @@
 """Prepare data for training networks."""
 
+import collections
 from collections import OrderedDict
 
 from bert.tokenization import BertTokenizer
 from sklearn.preprocessing import MultiLabelBinarizer
 
 from loader.prepNN.sent2net import prep_sentences
-from loader.prepNN.ent2net import entity2network, _elem2idx
+from loader.prepNN.ent2net import entity2network
+from loader.prepNN.ev2net import event2network
+from loader.prepNN.mapping import _elem2idx
 from loader.prepNN.span4nn import get_nn_data
 
 
@@ -21,12 +24,18 @@ def data2network(data_struct, data_type, params):
 
     all_sentences = []
 
+    # C2T add:
+    max_ev_per_layer = params['max_ev_per_layer']
+
     # nner: Using subwords:
     tokenizer = BertTokenizer.from_pretrained(
         params['bert_model'], do_lower_case=False
     )
 
+    events_map = collections.defaultdict()
+
     for xx, sid in enumerate(data_struct['input']):
+
         # input
         sentence_data = data_struct['input'][sid]
 
@@ -34,13 +43,18 @@ def data2network(data_struct, data_type, params):
         fid = sid.split(':')[0]
 
         # words to ids
+        # words = sentence_data['words']
         word_ids = wordsIDs[xx]
         words = org_sent_words[xx]
 
         # entity
-        readable_e, idxs, ents, toks2, etypes2ids, entities, sw_sentence, sub_to_word, subwords, valid_starts, tagsIDs, terms = entity2network(
+        readable_e, idxs, ents, toks2, etypes2ids, entities, sw_sentence, sub_to_word, subwords, valid_starts, tagsIDs, tagsTR, terms = entity2network(
             sentence_data, words, params, tokenizer)
 
+        # events
+        events, truth_ev, max_ev_per_layer = event2network(sentence_data, fid, idxs, events_map, max_ev_per_layer,
+                                                           readable_e, params)
+
         # return
         sentence_vector = OrderedDict()
         sentence_vector['fid'] = fid
@@ -50,23 +64,43 @@ def data2network(data_struct, data_type, params):
         sentence_vector['offsets'] = sentence_data['offsets']
         sentence_vector['e_ids'] = idxs
         sentence_vector['tags'] = tagsIDs
+        sentence_vector['tagsTR'] = tagsTR
         sentence_vector['etypes2'] = etypes2ids
         sentence_vector['toks2'] = toks2
         sentence_vector['raw_words'] = sentence_data['words']
+        sentence_vector['truth_ev'] = truth_ev
 
+        # nner
         sentence_vector['entities'] = entities
         sentence_vector['sw_sentence'] = sw_sentence
         sentence_vector['terms'] = terms
+        sentence_vector['relations'] = sentence_data['readable_r']
+        sentence_vector['events'] = events
         sentence_vector['sub_to_word'] = sub_to_word
         sentence_vector['subwords'] = subwords
         sentence_vector['valid_starts'] = valid_starts
 
-        all_sentences.append(sentence_vector)
+        # ignore this sentence or not
+        ignore_sent = False
+
+        # filter sentence with no entity, for training set only (contains 'train' in path)
+        if params['filter_no_ent_sents'] and data_type == 'train':
 
-    return all_sentences
+            # check number of entities in this sentence
+            ents_no = len(sentence_vector['e_ids'])
+            if ents_no == 0:
+                ignore_sent = True
 
+        if not ignore_sent:
+            all_sentences.append(sentence_vector)
 
-def torch_data_2_network(cdata2network, params, do_get_nn_data):
+    # C2T add
+    params['max_ev_per_layer'] = max_ev_per_layer
+
+    return all_sentences, events_map
+
+
+def torch_data_2_network(cdata2network, events_map, params, do_get_nn_data):
     """ Convert object-type data to torch.tensor type data, aim to use with Pytorch
     """
     etypes = [data['etypes2'] for data in cdata2network]
@@ -76,6 +110,8 @@ def torch_data_2_network(cdata2network, params, do_get_nn_data):
     sw_sentences = [data['sw_sentence'] for data in cdata2network]
     termss = [data['terms'] for data in cdata2network]
     valid_startss = [data['valid_starts'] for data in cdata2network]
+    relationss = [data['relations'] for data in cdata2network]
+    eventss = [data['events'] for data in cdata2network]
 
     fids = [data['fid'] for data in cdata2network]
     wordss = [data['words'] for data in cdata2network]
@@ -90,6 +126,7 @@ def torch_data_2_network(cdata2network, params, do_get_nn_data):
     # User-defined data
     if not params["predict"]:
         id_tag_mapping = params["mappings"]["nn_mapping"]["id_tag_mapping"]
+        trigger_ids = params["mappings"]["nn_mapping"]["trTypes_Ids"]
 
         mlb = MultiLabelBinarizer()
         mlb.fit([sorted(id_tag_mapping)[1:]])  # [1:] skip label O
@@ -99,13 +136,23 @@ def torch_data_2_network(cdata2network, params, do_get_nn_data):
 
         params["max_span_width"] = max(params["max_entity_width"], params["max_trigger_width"])
 
+        params["mappings"]["nn_mapping"]["full_labels"] = sorted([v for k, v in id_tag_mapping.items() if k > 0])
+        params["mappings"]["nn_mapping"]["trigger_labels"] = sorted(
+            [v for k, v in id_tag_mapping.items() if k in trigger_ids])
+
         params["mappings"]["nn_mapping"]["num_triggers"] = len(params["mappings"]["nn_mapping"]["trigger_labels"])
         params["mappings"]["nn_mapping"]["num_entities"] = params["mappings"]["nn_mapping"]["num_labels"] - \
                                                            params["mappings"]["nn_mapping"]["num_triggers"]
 
     if do_get_nn_data:
-        nn_data = get_nn_data(fids, entitiess, termss, valid_startss, sw_sentences,
-                              tokenizer, params)
+        nn_data = get_nn_data(fids, entitiess, termss, valid_startss, relationss, eventss, sw_sentences,
+                              tokenizer, events_map,
+                              params)
 
         return {'nn_data': nn_data, 'etypes': etypes, 'fids': fids, 'words': wordss, 'offsets': offsetss,
                 'sub_to_words': sub_to_words, 'subwords': subwords, 'entities': entitiess}
+    else:
+        return {'termss': termss, 'relationss': relationss, 'eventss': eventss, 'sw_sentences': sw_sentences,
+                'tokenizer': tokenizer, 'events_map': events_map, 'params': params, 'etypes': etypes, 'fids': fids,
+                'words': wordss, 'offsets': offsetss, 'sub_to_words': sub_to_words, 'subwords': subwords,
+                'entities': entitiess}
diff --git a/loader/prepNN/rel2net.py b/loader/prepNN/rel2net.py
new file mode 100644
index 0000000..0039357
--- /dev/null
+++ b/loader/prepNN/rel2net.py
@@ -0,0 +1,23 @@
+"""Prepare relation data for networks."""
+
+
+def gen_nn_rel_info(span_terms, relations, params):
+    gtruth = {}
+    left = []
+    right = []
+
+    for term_i, term_j in relations:
+        if term_i in span_terms.term2id and term_j in span_terms.term2id:
+            i = span_terms.term2id[term_i]
+            j = span_terms.term2id[term_j]
+
+            rel_id = relations[(term_i, term_j)][0]
+            map_rel_type = params['mappings']['rel_map'][relations[(term_i, term_j)][1]]
+            params['statistics']['rel'][map_rel_type] = params['statistics']['rel'][map_rel_type] + 1
+            gtruth[i, j] = map_rel_type
+            if ('_INV' not in rel_id) and (term_i != term_j):
+                # if it is inverse, take the index of the element
+                left.append(i)
+                right.append(j)
+
+    return gtruth, (left, right)
diff --git a/loader/prepNN/sent2net.py b/loader/prepNN/sent2net.py
index 568736f..a268880 100644
--- a/loader/prepNN/sent2net.py
+++ b/loader/prepNN/sent2net.py
@@ -15,6 +15,7 @@ def prep_sentences(data_struct, data_type, params):
     # MAPPINGS
     singlesW = params['singletons']
     words_train = params['words_train']
+    # pre_words = params['pre_words']
     uw_prob = params['unk_w_prob']
     if data_type == 'train':
         singlesW = set(singlesW)
@@ -40,6 +41,7 @@ def prep_sentences(data_struct, data_type, params):
         in_train = 0
         in_pretrain = 0
         nowhere = 0
+        # pre_words = set(pre_words)
         words_train = set(words_train)
         for sid, s in enumerate(data_struct['sentences']):
             ff = []
@@ -47,7 +49,9 @@ def prep_sentences(data_struct, data_type, params):
                 if w in words_train:
                     in_train += 1
                     ff.append(w)
-
+                # elif w in pre_words:
+                #     in_pretrain += 1
+                #     ff.append(w)
                 else:
                     nowhere += 1
                     ff.append('<UNK>')
diff --git a/loader/prepNN/span4nn.py b/loader/prepNN/span4nn.py
index e1e6e0e..ba2f0fb 100644
--- a/loader/prepNN/span4nn.py
+++ b/loader/prepNN/span4nn.py
@@ -3,8 +3,12 @@
 import numpy as np
 from collections import namedtuple
 
+from loader.prepNN.rel2net import gen_nn_rel_info
+from loader.prepNN.ev2net import count_ev_truth, gen_nn_truth_evs
+
 Term = namedtuple('Term', ['id2term', 'term2id', 'id2label'])
 
+
 def get_span_index(
         span_start,
         span_end,
@@ -30,8 +34,10 @@ def get_span_index(
     return span_index * limit + index
 
 
-def get_batch_data(fid, entities, terms, valid_starts, sw_sentence, tokenizer, params):
+def get_batch_data(fid, entities, terms, valid_starts, relations, events, sw_sentence, tokenizer, events_map,
+                   params):
     mlb = params["mappings"]["nn_mapping"]["mlb"]
+    num_labels = params["mappings"]["nn_mapping"]["num_labels"]
 
     max_entity_width = params["max_entity_width"]
     max_trigger_width = params["max_trigger_width"]
@@ -49,7 +55,14 @@ def get_batch_data(fid, entities, terms, valid_starts, sw_sentence, tokenizer, p
         tokens = tokens[:num_tokens]
         token_mask = token_mask[:num_tokens]
 
-    ids = tokenizer.convert_tokens_to_ids(["[CLS]"] + tokens + ["[SEP]"])
+    # use lstm
+    if params['use_lstm']:
+        tokens = ["<start>"] + tokens + ["<end>"]
+        ids = [0] * len(tokens)
+
+    # or bert
+    else:
+        ids = tokenizer.convert_tokens_to_ids(["[CLS]"] + tokens + ["[SEP]"])
 
     token_mask = [0] + token_mask + [0]
 
@@ -94,6 +107,8 @@ def get_batch_data(fid, entities, terms, valid_starts, sw_sentence, tokenizer, p
                 if span_start not in valid_starts or (span_end + 1) not in valid_starts:
                     # Ensure that there is no entity label here
                     if not (params['predict'] and (params['pipelines'] and params['pipe_flag'] != 0)):
+
+                        # TODO: temporarily comment to fix bug, check again
                         assert (span_start, span_end) not in entities
 
                         entity_mask = 0
@@ -104,7 +119,13 @@ def get_batch_data(fid, entities, terms, valid_starts, sw_sentence, tokenizer, p
                 if (span_start, span_end) in entities:
                     span_label = entities[(span_start, span_end)]
                     span_term = terms[(span_start, span_end)]
+                    # check if term can create relation in gold
+                    # for idx, term in enumerate(span_term):
+                    #     if term not in params['map_entities_without_relations']:
+                    #         span_label_match_rel = 1
+                    #         break
 
+            # assert len(span_label) <= params["ner_label_limit"], "Found an entity having a lot of types"
             if len(span_label) > params["ner_label_limit"]:
                 print('over limit span_label', span_term)
 
@@ -113,6 +134,7 @@ def get_batch_data(fid, entities, terms, valid_starts, sw_sentence, tokenizer, p
                     sorted(zip(span_label, span_term), reverse=True)[:params["ner_label_limit"]]):
                 span_index = get_span_index(span_start, span_end, max_span_width, num_tokens, idx,
                                             params["ner_label_limit"])
+
                 span_terms.id2term[span_index] = term_id
                 span_terms.term2id[term_id] = span_index
 
@@ -128,6 +150,12 @@ def get_batch_data(fid, entities, terms, valid_starts, sw_sentence, tokenizer, p
             entity_masks.append(entity_mask)
             trigger_masks.append(trigger_mask)
 
+    # relations
+    gtruth, l2r = gen_nn_rel_info(span_terms, relations, params)
+
+    # events
+    truth_ev, ev_idxs, ev_lbls = gen_nn_truth_evs(fid, span_terms, events, events_map, params)
+
     return {
         'tokens': tokens,
         'ids': ids,
@@ -138,24 +166,47 @@ def get_batch_data(fid, entities, terms, valid_starts, sw_sentence, tokenizer, p
         'span_labels_match_rel': span_labels_match_rel,
         'entity_masks': entity_masks,
         'trigger_masks': trigger_masks,
-        'span_terms': span_terms
+        'span_terms': span_terms,
+        'gtruth': gtruth,
+        'l2r': l2r,
+        'truth_ev': truth_ev,
+        'ev_idxs': ev_idxs,
+        'ev_lbls': ev_lbls
     }
 
 
-def get_nn_data(fids, entitiess, termss, valid_startss, sw_sentences, tokenizer, params):
+def get_nn_data(fids, entitiess, termss, valid_startss, relationss, eventss, sw_sentences, tokenizer, events_map,
+                params):
     samples = []
 
+    max_ev_per_batch = params['max_ev_per_batch']
+
     for idx, sw_sentence in enumerate(sw_sentences):
         fid = fids[idx]
         entities = entitiess[idx]
         terms = termss[idx]
         valid_starts = valid_startss[idx]
-
-        sample = get_batch_data(fid, entities, terms, valid_starts, sw_sentence, tokenizer,
-                                params)
+        relations = relationss[idx]
+        events = eventss[idx]
+        sample = get_batch_data(fid, entities, terms, valid_starts, relations, events, sw_sentence, tokenizer,
+                                events_map, params)
+        max_ev_per_batch = max(sample['truth_ev'].shape[0], max_ev_per_batch)
         samples.append(sample)
 
-    all_tokens = []
+    # count the number of events in truth
+    count_ev_truth(samples)
+
+    print('max_ev_per_batch', max_ev_per_batch)
+    print('max_ev_per_layer', params['max_ev_per_layer'])
+    print('max_seq', params['max_seq'])
+
+    params['max_ev_per_batch'] = max_ev_per_batch
+
+    # for lstm
+    if params['use_lstm']:
+        all_tokens = [sample["tokens"] for sample in samples]
+    else:
+        all_tokens = []
 
     all_ids = [sample["ids"] for sample in samples]
     all_token_masks = [sample["token_mask"] for sample in samples]
@@ -166,6 +217,11 @@ def get_nn_data(fids, entitiess, termss, valid_startss, sw_sentences, tokenizer,
     all_entity_masks = [sample["entity_masks"] for sample in samples]
     all_trigger_masks = [sample["trigger_masks"] for sample in samples]
     all_span_terms = [sample["span_terms"] for sample in samples]
+    all_gtruth = [sample["gtruth"] for sample in samples]
+    all_l2r = [sample["l2r"] for sample in samples]
+    all_truth_ev = [sample["truth_ev"] for sample in samples]
+    all_ev_idxs = [sample["ev_idxs"] for sample in samples]
+    all_ev_lbls = [sample["ev_lbls"] for sample in samples]
 
     return {
         'tokens': all_tokens,
@@ -177,5 +233,10 @@ def get_nn_data(fids, entitiess, termss, valid_startss, sw_sentences, tokenizer,
         'span_labels_match_rel': all_span_labels_match_rel,
         'entity_masks': all_entity_masks,
         'trigger_masks': all_trigger_masks,
-        'span_terms': all_span_terms
+        'span_terms': all_span_terms,
+        'gtruth': all_gtruth,
+        'l2r': all_l2r,
+        'truth_ev': all_truth_ev,
+        'ev_idxs': all_ev_idxs,
+        'ev_lbls': all_ev_lbls
     }
diff --git a/loader/prepNN/structure.py b/loader/prepNN/structure.py
new file mode 100644
index 0000000..53cac7a
--- /dev/null
+++ b/loader/prepNN/structure.py
@@ -0,0 +1,283 @@
+"""Process event structures."""
+
+from glob import glob
+import os
+import json
+from loguru import logger
+import collections
+from collections import OrderedDict
+import numpy as np
+
+from utils import utils
+
+
+def load_general_rules(cur_rules, params):
+
+    num_dups = 0
+
+    rule_fns = glob(os.path.join(params["rule_dir"], "*.rule"))
+
+    for rule_fn in rule_fns:
+        for rule_line in utils.read_lines(rule_fn):
+            trigger_id, args = json.loads(rule_line)
+
+            accumulative_level = 0
+
+            rel_arg_pairs = []
+            rel_arg_pair_strs = []
+
+            if args:
+                for relation_id, level, arg_id in args:
+                    accumulative_level += level
+
+                    rel_arg_pairs.append([relation_id, arg_id])
+                    rel_arg_pair_strs.append("{}{}{}".format(relation_id, level, arg_id))
+            else:
+                rel_arg_pairs.append([str(None), trigger_id])
+                rel_arg_pair_strs.append("{}{}{}".format(str(None), 0, trigger_id))
+
+            rule_str = "+".join(rel_arg_pair_strs)
+
+            if accumulative_level > 0:
+                if trigger_id in cur_rules["structs1"]:
+                    if rule_str in cur_rules["structs1"][trigger_id]:
+                        num_dups += 1
+                    else:
+                        cur_rules["structs1"][trigger_id][rule_str] = [rel_arg_pairs]
+            else:
+                if trigger_id in cur_rules["structs0"]:
+                    if rule_str in cur_rules["structs0"][trigger_id]:
+                        num_dups += 1
+                    else:
+                        cur_rules["structs0"][trigger_id][rule_str] = [rel_arg_pairs]
+
+    logger.debug("# Event rule duplicates: {}".format(num_dups))
+
+
+def remove_invalid_rules(cur_rules):
+    for rule_structures in cur_rules.values():
+        for trigger_id in rule_structures:
+            for rule_str in list(rule_structures[trigger_id]):
+                has_relations = {relation_id for relation_id, _ in rule_structures[trigger_id][rule_str][0]}
+                if trigger_id == "Mutation" and "Theme" not in has_relations and (
+                        "CSite" in has_relations or "Site" in has_relations):
+                    del rule_structures[trigger_id][rule_str]
+                    logger.info("Removed an invalid rule: {} {}".format(trigger_id, rule_str))
+
+
+def merge_struct(train_struct, dev_struct):
+    for trigger_id, rule_structure in dev_struct.items():
+        for rule_str, args in rule_structure.items():
+
+            if trigger_id not in train_struct:
+                # this trigger not in train set, create new
+                train_struct[trigger_id] = OrderedDict()
+            
+            train_struct[trigger_id][rule_str] = args
+
+
+def count_rules(train_struct):
+    count = 0
+    for type_tr, pairs in train_struct.items():
+        count += len(pairs)
+    return count
+
+
+def prep_structs_mapping(structsTR, type_map, rtype_map, rel_size):
+    structs_types = OrderedDict()
+    structs_map = OrderedDict()
+
+    max_ev_per_tr = 0
+    max_rel_per_ev = 0
+
+    for typeTR, structs in structsTR.items():
+        typeTRid = type_map[typeTR]
+        structs_id = []
+        structs_map[typeTR] = OrderedDict()
+
+        max_ev_per_tr = max(max_ev_per_tr, len(structs))
+
+        for struct, struct_data in structs.items():
+
+            rel_id = []
+            for rel in struct_data[0]:
+                if rel[0] == 'None':
+                    r2id = (rel_size, typeTRid)
+                    rel_id.append(r2id)
+                else:
+                    if rel[0] in rtype_map and rel[1] in type_map:
+                        r2id = (rtype_map[rel[0]], type_map[rel[1]])
+                        rel_id.append(r2id)
+
+            max_rel_per_ev = max(max_rel_per_ev, len(rel_id))
+
+            # structs_map[typeTR][struct] = rel_id
+            if len(rel_id) == len(struct_data[0]):
+                rel_id_count = collections.Counter(rel_id)
+                structs_map[typeTR][struct] = rel_id_count
+
+                # check to avoid duplicate
+                if rel_id not in structs_id:
+                    structs_id.append(rel_id)
+        structs_types[typeTR] = structs_id
+
+    return structs_map, structs_types, max_ev_per_tr, max_rel_per_ev
+
+
+def prep_struct_map_ids(struct_map, typeTR_map, type_size, rel_size):
+    """
+    :param struct_map: mapping for each trigger type, there is a list of event structure, each structure is a list of arguments, each argument is a pair of relation type index, entity type index
+    :param typeTR_map: mapping, each trigger type is assigned with an integer
+    :param type_size: number of entity types + trigger types
+    :param typeTR_size: number of trigger types
+    :param rel_size: number of relation types
+    :return:
+        ev_structs_ids: array[type_size x 5], for the number of argument, each element is a list object for event structures of each trigger type
+        ev_structs_args: array[type_size], list of arguments (pairs of (relation type, entity type)) for each trigger type
+    """
+    # convert event structure map into indices, size=[trigger_type_size x 5_arguments] (0 is for no argument)
+    ev_structs_ids = -1 * np.ones((type_size + 1, 5), dtype=np.object)
+    # struct_arg_map = OrderedDict()
+
+    for typeTR, structs in struct_map.items():
+        trid = typeTR_map[typeTR]
+
+        # devide and store arguments separately by the number of arguments
+        structs_0arg = []  # no argument
+        structs_1arg = []  # 1 argument
+        structs_2arg = []  # 2 arguments
+        structs_3arg = []  # 3 arguments
+        structs_4arg = []  # 4 arguments
+        # struct_arg_map[typeTR] = OrderedDict()
+
+        for struct in structs:
+            args = []
+            no_arg = len(struct)
+            for arg in struct:
+                args.append(arg)
+
+            # check if it is no arg
+            zero_arg = False
+            if no_arg == 1 and args[0][0] == rel_size:
+                zero_arg = True
+
+            # convert list of arg to counter: compare easier
+            args = collections.Counter(args)
+
+            # store args to map
+            # struct_arg_map[typeTR][struct] = args
+
+            # check if there is one argument:
+            if no_arg == 1:
+
+                # it can be no argument (argument with the relation type is OTHER)
+                if zero_arg:
+                    if args not in structs_0arg:
+                        structs_0arg.append(args)
+
+                # or it can be one arguments
+                else:
+                    if args not in structs_1arg:
+                        structs_1arg.append(args)
+
+            # otherwise: 2, 3, 4 arguments
+            elif no_arg == 2:
+                if args not in structs_2arg:
+                    structs_2arg.append(args)
+            elif no_arg == 3:
+                if args not in structs_3arg:
+                    structs_3arg.append(args)
+            elif no_arg == 4:
+                if args not in structs_4arg:
+                    structs_4arg.append(args)
+
+        # store event structures, for each trigger type id, and for each number of argument: 0..4
+        if len(structs_0arg) > 0:
+            ev_structs_ids[trid][0] = structs_0arg
+        if len(structs_1arg) > 0:
+            ev_structs_ids[trid][1] = structs_1arg
+        if len(structs_2arg) > 0:
+            ev_structs_ids[trid][2] = structs_2arg
+        if len(structs_3arg) > 0:
+            ev_structs_ids[trid][3] = structs_3arg
+        if len(structs_4arg) > 0:
+            ev_structs_ids[trid][4] = structs_4arg
+
+    return ev_structs_ids
+
+
+def prep_pair_mapping(structsTR, type_map):
+    etype_pairs = collections.defaultdict(set)
+    for _, struct_level in structsTR.items():
+        for typeTR, argStructs in struct_level.items():
+            for _, argStruct in argStructs.items():
+                for argPair in argStruct[0]:
+                    typeT = argPair[1]
+                    # pair_map[typeTR].add(typeT)
+                    if typeT in type_map and typeTR in type_map:
+                        etype_pairs[type_map[typeTR]].add(type_map[typeT])
+
+    return etype_pairs
+
+
+def prep_pair_mapping_from_file(entity_pairs, type_map):
+    with open(entity_pairs, 'r') as stream:
+        entity_pairs = utils._ordered_load(stream)
+
+    etype_pairs = collections.defaultdict(set)
+    for e, p in entity_pairs.items():
+        try:
+            etype = type_map[e]
+            ps = p.split(',')
+            for paired_e in ps:
+                try:
+                    paired_etype = type_map[paired_e]
+                    etype_pairs[etype].add(paired_etype)
+                except:
+                    pass
+        except:
+            pass
+
+    return etype_pairs
+
+
+def process_structure(data_struct, data_struct_dev, params, type_map, typeTR_map, rtype_map, type_size, rel_size):
+    structs_tr = data_struct['structsTR']
+    structs_tr_dev = data_struct_dev['structsTR']
+
+    if params['use_dev_rule']:
+        merge_struct(structs_tr['structs0'], structs_tr_dev['structs0'])
+        merge_struct(structs_tr['structs1'], structs_tr_dev['structs1'])
+
+    if params['use_general_rule']:
+        load_general_rules(structs_tr, params)
+        remove_invalid_rules(structs_tr)
+
+    print('Total FLAT rules', count_rules(structs_tr['structs0']))
+    print('Total NESTED rules', count_rules(structs_tr['structs1']))
+
+    # convert structure mapping into indices
+    flat_structs_map, flat_types_map, max_ev_per_tr0, max_rel_per_ev0 = prep_structs_mapping(structs_tr['structs0'],
+                                                                                             type_map, rtype_map,
+                                                                                             rel_size)
+    nested_structs_map, nested_types_map, max_ev_per_tr1, max_rel_per_ev1 = prep_structs_mapping(structs_tr['structs1'],
+                                                                                                 type_map, rtype_map,
+                                                                                                 rel_size)
+
+    # create event structures for flat and nested events
+    flat_types_id_map = prep_struct_map_ids(flat_types_map, typeTR_map, type_size, rel_size)
+    nested_types_id_map = prep_struct_map_ids(nested_types_map, typeTR_map, type_size, rel_size)
+
+    params['max_ev_per_tr'] = max(max_ev_per_tr0, max_ev_per_tr1, params['max_ev_per_tr'])
+    params['max_rel_per_ev'] = max(max_rel_per_ev0, max_rel_per_ev1, params['max_rel_per_ev'])
+    params['max_rel_per_ev'] += 1
+
+    print('max_ev_per_tr', params['max_ev_per_tr'])
+    print('max_rel_per_ev', params['max_rel_per_ev'])
+
+    etype_pairs = prep_pair_mapping(structs_tr, type_map)
+
+    # if params['using_entity_pairs_filter']:
+    #     etype_pairs = prep_pair_mapping_from_file(params['entity_pairs'], type_map)
+
+    return flat_structs_map, nested_structs_map, flat_types_id_map, nested_types_id_map, etype_pairs
diff --git a/nets/EVGen.py b/model/EVGen.py
similarity index 85%
rename from nets/EVGen.py
rename to model/EVGen.py
index bdc413f..3e84ad8 100644
--- a/nets/EVGen.py
+++ b/model/EVGen.py
@@ -15,6 +15,96 @@ def __init__(self, params):
         # parameters
         self.params = params
 
+    def show_input(self, etypes, l2r, rpred_types, rpred_ids, ev_idx, ev_truth, ev_lbls):
+        """For debug, convert indices to real events."""
+
+        # print relation data
+        print('No., batch id, a1id, a2id, r-type')
+        for xx, rid in enumerate(rpred_ids):
+            # indices
+            bid = l2r[0][rid]
+            a1id = l2r[1][rid]
+            a2id = l2r[2][rid]
+
+            # rtype
+            rtypeid = rpred_types[rid]
+            rtype = self.params['mappings']['rev_rtype_map'][rtypeid]
+
+            # entities
+            a1 = self.params['debugs']['terms_map'][bid, a1id]
+            a2 = self.params['debugs']['terms_map'][bid, a2id]
+
+            # show
+            print(xx, bid, a1id, a2id, rtype, a1, a2)
+
+        return
+
+    def show_rels_group(self, rels_group):
+        """For debug."""
+        xx = 0
+        for trid, rel_group in rels_group.items():
+
+            # store a list of (rtype-etype)
+            rels = []
+            for rel_data in rel_group:
+                a1typeid = rel_data[0]
+                a2ids = rel_data[3]
+                rtypeid = rel_data[2][0]
+                a2typeid = rel_data[2][1]
+                rels.append(rel_data[2])
+
+                bid = trid[0]
+                a1id = trid[1]
+                a2id = a2ids[1]
+                a1type = self.params['mappings']['rev_type_map'][a1typeid]
+                a2type = self.params['mappings']['rev_type_map'][a2typeid]
+                a1 = self.params['debugs']['terms_map'][bid, a1id]
+                a2 = self.params['debugs']['terms_map'][bid, a2id]
+                rtype = self.params['mappings']['rev_rtype_map'][rtypeid]
+                xx += 1
+
+                print(xx, trid, a1, a1type, a2, a2type, rtype)
+            print(trid, rels)
+            print()
+
+    def deb_generated_candidates(self, ev_st_candidates, etypes, ev_cand_triggers):
+        """For debug only, check the output of generated event structure candidates."""
+
+        # translate each candidate
+        # format: [0=trig_id, 1-ev-structure-counter, 2-ev-structure-order, 3-ev_label, 4=modality label, 5=[list IN/OUT ids] ]
+        for xx, ev_cand in enumerate(ev_st_candidates):
+            trid = ev_cand[0]
+            rel_group = ev_cand[1]
+            rel_group_list = ev_cand[2]
+            ev_label = ev_cand[3]
+            # mod_label = ev_cand[4]
+            # io_ids = ev_cand[5]
+
+            bid = trid[0]
+            a1id = trid[1]
+            a1 = self.params['debugs']['terms_map'][bid, a1id]
+            a1typeid = etypes[(bid, a1id)].item()
+            a1type = self.params['mappings']['rev_type_map'][a1typeid]
+
+            # trigger and trigger type
+            print(xx, a1, a1type, 'ev label=', ev_label)
+
+            # arguments structure
+            rel_group_vals = list(rel_group.elements())
+            for arg_ in rel_group_vals:
+                rtypeid = arg_[0]
+                a2typeid = arg_[1]
+                a2type = self.params['mappings']['rev_type_map'][a2typeid]
+                if rtypeid == self.params['voc_sizes']['rel_size']:
+                    rtype = 'None'
+                else:
+                    rtype = self.params['mappings']['rev_rtype_map'][rtypeid]
+                print('(', rtype, a2type, ')')
+
+            print()
+
+        return
+
     def group_rels(self, l2r, rpred_types, rpred_ids, etypes):
         """For generating event candidates."""
 
@@ -48,13 +138,21 @@ def group_rels(self, l2r, rpred_types, rpred_ids, etypes):
                     [a2typeid, rid.item(), (rtypeid, a1typeid), (bid.item(), a1id.item())])
 
             # if both a1 and a2 are trigger: this can be for nested events
+            # the direction can be reverse later to make sure having enough candidates: TODO
             if a1typeid in self.params['trTypes_Ids'] and a2typeid in self.params['trTypes_Ids']:
                 nest_rels_group[(bid.item(), a1id.item())].append(
                     [a1typeid, rid.item(), (rtypeid, a2typeid), (bid.item(), a2id.item())])
 
+        # show output for debug only
+        # TODO: comment when training
+        # print('print flat rels_group')
+        # self.show_rels_group(flat_rels_group)
+        # print('print nested rels_group')
+        # self.show_rels_group(nest_rels_group)
+
         return flat_rels_group, nest_rels_group
 
-    def add_no_arg_trigger(self, tr_ids, etypes, flat_structs_map):
+    def add_no_arg_trigger(self, tr_ids, etypes, ev_idx, ev_truth, ev_lbls, flat_structs_map):
         """Add no-argument triggers."""
 
         # store in a map: key is trigger id, value is a pair of (rtype, trigger type); rtype is a special type
@@ -64,8 +162,22 @@ def add_no_arg_trigger(self, tr_ids, etypes, flat_structs_map):
         for trid_ in tr_ids:
             trid = (trid_[0].item(), trid_[1].item())
 
-            truth = [-1]
-            mod_label = [-1]
+            # process truth and labels
+            bid = trid[0]
+            a1id = trid[1]
+            truth_idx = ev_idx[bid].get(a1id, -1)
+            if truth_idx != -1:
+                truth = ev_truth[bid][truth_idx]
+                mod_label = ev_lbls[bid][truth_idx]  # modality: 1-non-modality, 2-speculation, 3-negation
+
+                # truth for flat and nested
+                # flat_truth = truth[0]
+                # nest_truth = truth[1]
+                truth = truth[0]  # for flat. No-argument is always flat.
+
+            else:
+                truth = [-1]
+                mod_label = [-1]
 
             # rtype and trigger type
             rtype = self.params['voc_sizes']['rel_size']
@@ -80,16 +192,33 @@ def add_no_arg_trigger(self, tr_ids, etypes, flat_structs_map):
 
         return no_arg_group
 
-    def add_truth_to_trigger(self, rels_group, structs_map, levelid=0):
+    def add_truth_to_trigger(self, rels_group, ev_idx, ev_truth, ev_lbls, structs_map, levelid=0):
         """For generating event candidates.
         # add event truth and labels to each trigger
         # levelid = 0: flat, levelid=1: nested events
         """
+        # TODO: nested
 
         for trid, rel_group in rels_group.items():
+            # get index
+            bid = trid[0]
+            a1id = trid[1]
 
-            truth = -1 * np.ones((self.params['max_ev_level'] + 1, self.params['max_ev_args'] + 1), dtype=np.object)
-            mod_label = [-1]
+            # process truth and labels
+            truth_idx = ev_idx[bid].get(a1id, -1)
+            if truth_idx != -1:
+                truth = ev_truth[bid][truth_idx]
+                mod_label = ev_lbls[bid][truth_idx]  # modality: 1-non-modality, 2-speculation, 3-negation
+
+                # truth for flat and nested
+                # flat_truth = truth[0]
+                # nest_truth = truth[1] # TODO
+                # truth = truth[levelid:]  # for flat
+
+            else:
+                # level_truth = [-1]
+                truth = -1 * np.ones((self.params['max_ev_level'] + 1, self.params['max_ev_args'] + 1), dtype=np.object)
+                mod_label = [-1]
 
             if levelid == 0:
                 level_truth = truth[levelid]
@@ -175,6 +304,7 @@ def create_multiple_flat_arg_candidates(self, trid, rels_group, args_list, n_arg
 
         # generate all possible combinations among arguments with limited by the maximum number of args
         max_n_args = self.params['max_ev_args']
+        # TODO: now fix as 4 arguments, can revise to set in parameter later.
 
         for xx1, arg1_ in enumerate(args_list):
 
@@ -331,6 +461,7 @@ def create_multiple_nest_arg_candidates(self, trid, rels_group, args_list, n_arg
 
         # generate all possible combinations among arguments with limited by the maximum number of args
         max_n_args = self.params['max_ev_args']
+        # TODO: now fix as 4 arguments, can revise to set in parameter later.
 
         for xx1, arg1_ in enumerate(args_list):
 
@@ -779,6 +910,9 @@ def add_nest_arguments(self, nest_rels_group, ev_flat_arg_ids4nn, flat_rels_grou
             Add reverse trigger pair if necessary.
         """
 
+        # store the list of trigger and entity arguments
+        # nest_args = collections.OrderedDict()
+
         # store new reversed arguments in a new dictionary
         rev_nest_rels_group = collections.defaultdict(list)
 
@@ -804,7 +938,9 @@ def add_nest_arguments(self, nest_rels_group, ev_flat_arg_ids4nn, flat_rels_grou
                     args_list.append(rel_group)
 
                 # if this argument not in flat candidates, this is not a candidate, we can reverse
+                # TODO: or we always reverse?
                 else:
+                    # if argid not in ev_flat_arg_ids4nn:
 
                     # create the reverse data
                     rev_arg = []
@@ -818,13 +954,32 @@ def add_nest_arguments(self, nest_rels_group, ev_flat_arg_ids4nn, flat_rels_grou
 
                         rev_args_list.append((argid, rev_arg))
 
+                        # add as a new candidate
+                        # if argid in nest_rels_group:
+                        #     arg_list = [a2id[3] for a2id in nest_rels_group[argid]]
+
+                        # check if this argument already exists
+                        # if trid not in arg_list:
+                        #     nest_rels_group[argid].append(rev_arg)
+                        # print('REVERSE TRIGGER PAIRS FOR NESTED EVENT.')
 
+                        # else:
+                        # add the reverse pair
+                        # if argid in rev_nest_rels_group:
+                        #     rev_nest_rels_group[argid].append(rev_arg)
+                        # else:
+                        #     rev_nest_rels_group[argid] = [rev_arg]
+                        # print('REVERSE TRIGGER PAIRS FOR NESTED EVENT.')
 
                     # the reverse also not in flat events
                     else:
                         no_ev_list.append([trid, rel_group])
                         no_ev_list.append([argid, rev_arg])
 
+                    # both directions have no flat event
+                    # else:
+                    #     print('INVALID NESTED EVENT CANDIDATE: NO FLAT EVENT TO THE TRIGGER ARGUMENT.')
+
             # add to the map
             if len(args_list) > 0:
                 if trid in rev_nest_rels_group:
@@ -880,10 +1035,13 @@ def add_nest_arguments(self, nest_rels_group, ev_flat_arg_ids4nn, flat_rels_grou
                     ent_arg_data.append([[[a2id, (-1, -1)]]])
                 rel_groups.extend(ent_args_list)
 
+            # store argument list for trigger
+            # nest_args[trid] = [trig_args, ent_args_list]
+
         return rev_nest_rels_group
 
-    def generate_event_candidate_structures(self, etypes, tr_ids, l2r, rpred_types, rpred_ids
-                                            ):
+    def generate_event_candidate_structures(self, etypes, tr_ids, l2r, rpred_types, rpred_ids, ev_idx, ev_truth,
+                                            ev_lbls):
         """ Generate event candidates structures.
             - Given a list of predicted/gold entities, triggers, relations
             - Given a set of EVENT STRUCTURES (rules by annotation), separated by event type (also trigger type)
@@ -909,16 +1067,19 @@ def generate_event_candidate_structures(self, etypes, tr_ids, l2r, rpred_types,
         flat_structs_map = self.params['mappings']['flat_types_id_map']
         nest_structs_map = self.params['mappings']['nested_types_id_map']
 
+        # show input: for debug only; to convert data indices to real entities, events .., it is difficult to check by indices
+        # self.show_input(etypes, l2r, rpred_types, rpred_ids, ev_idx, ev_truth, ev_lbls)
+
         # group rels for each trigger: one for flat and one for nested events
         flat_rels_group, nest_rels_group = self.group_rels(l2r, rpred_types, rpred_ids, etypes)
 
         # add truth, labels, and event structure to each trigger
         # the mapping: key=trigger id, values = a list of[ [list of relations], [truth, label, ev-structures] ]
-        ev_flat_cand_triggers = self.add_truth_to_trigger(flat_rels_group, flat_structs_map,
+        ev_flat_cand_triggers = self.add_truth_to_trigger(flat_rels_group, ev_idx, ev_truth, ev_lbls, flat_structs_map,
                                                           levelid=0)
 
         # prepare for no argument candidates
-        ev_no_arg_cand_triggers = self.add_no_arg_trigger(tr_ids, etypes, flat_structs_map)
+        ev_no_arg_cand_triggers = self.add_no_arg_trigger(tr_ids, etypes, ev_idx, ev_truth, ev_lbls, flat_structs_map)
 
         # create flat event candidates using event structures
         ev_flat_st_candidates, ev_flat_arg_ids4nn = self.create_ev_candidates(ev_flat_cand_triggers,
@@ -929,23 +1090,26 @@ def generate_event_candidate_structures(self, etypes, tr_ids, l2r, rpred_types,
         rev_nest_rels_group = self.add_nest_arguments(nest_rels_group, ev_flat_arg_ids4nn, flat_rels_group)
 
         # add truth: do it later after flat prediction
-        ev_nest_cand_triggers = self.add_truth_to_trigger(rev_nest_rels_group,
+        ev_nest_cand_triggers = self.add_truth_to_trigger(rev_nest_rels_group, ev_idx, ev_truth, ev_lbls,
                                                           nest_structs_map, levelid=1)
 
+        # for debug only, show generated candidates, TODO: comment when training
+        # _ = self.deb_generated_candidates(ev_st_candidates, etypes, ev_cand_triggers)
+
         # prepare for creating embeddings from event structure candidates
         ev_flat_cands_ids4nn = self.prepare4nn(ev_flat_st_candidates)
 
         return {'ev_cand_ids4nn': ev_flat_cands_ids4nn, 'ev_arg_ids4nn': ev_flat_arg_ids4nn,
                 'ev_nest_cand_triggers': ev_nest_cand_triggers}
 
-    def _generate(self, etypes, tr_ids, l2r, rpred_types, rpred_ids):
+    def _generate(self, etypes, tr_ids, l2r, rpred_types, rpred_ids, ev_idx, ev_truth, ev_lbls):
         """Generate event candidates indices for creating embeddings."""
 
         # a map with two output:
         # 1-event candidate indices: a list of event candidate, [trigger id, event label, modality label, in/out ids]
         # 2-event argument indices for each trigger: a map (key: trigger id, values: ids of relations and entity arguments)
-        ev_ids4nn = self.generate_event_candidate_structures(etypes, tr_ids, l2r, rpred_types, rpred_ids
-                                                             )
+        ev_ids4nn = self.generate_event_candidate_structures(etypes, tr_ids, l2r, rpred_types, rpred_ids, ev_idx,
+                                                             ev_truth, ev_lbls)
 
         return ev_ids4nn
 
@@ -965,6 +1129,9 @@ def select_nest_arguments(self, nest_group_rels, flat_pos_tr_ids, flat_pos_truth
             # get arguments
             args_list = args_data[:-1]
 
+            # truth for this trigger
+            # rels_group = nest_group_rels[trid]
+
             # check whether the trigger argument ids included in the predicted positive tr_ids
             for trig_arg_data in args_list:
 
@@ -974,8 +1141,13 @@ def select_nest_arguments(self, nest_group_rels, flat_pos_tr_ids, flat_pos_truth
 
                     # store which event id will replace trigger argument, and its truth
                     posid_list = []
+                    # truth_list = []
+
+                    # store positive ids by level
+                    # pos_level_list = []
 
                     # check all possible appearance of this trigger in the predicted events
+                    # TODO: Replace for loop by a better function to find all matched indices in a list
                     for posid, pos_trid in enumerate(flat_pos_tr_ids):
                         if argid == pos_trid:
                             pos_truth = flat_pos_truth_ids[posid]
@@ -983,16 +1155,31 @@ def select_nest_arguments(self, nest_group_rels, flat_pos_tr_ids, flat_pos_truth
                             # only add positive truth for training
                             if pos_truth != -1 or not self.training:
                                 # positive id: index of (level, event id)
+                                # posid_ = (current_nested_level, posid)
                                 posid_list.append([pos_truth, (current_nested_level, posid)])
 
+                                # truth_list.append(flat_pos_truth_ids[posid])
+
                     # add to the list of arguments by level: 4th element
                     trig_arg_data[4].append(posid_list)
                     # if there is predicted events
                     if len(posid_list) > 0:
 
+                        # add by level
+                        # pos_level_list.append([truth_list, posid_list])
+
+                        # add to the list of arguments: for only one nested level
+                        # trig_arg_data.append(truth_list)
+                        # trig_arg_data.append(posid_list)
+
+                        # add to the list of arguments by level: 4th element
+                        # trig_arg_data[4].append(posid_list)
+
                         # mark this is used to make the next level nested candidate
                         trig_arg_data[5] = 1
+                        # is_new_ev = True
 
+                        # pos_trig_args_list.append(trig_arg_data)
 
                     # otherwise: mark this event argument is not used to search for next level nested candidates
                     else:
@@ -1036,6 +1223,10 @@ def generate_candidate_output(self, trid, rel_group_counter, rel_group_list, arg
                                 matched_truth = (0, truth_[0][0], truth_[0][1], truth_[0][2])
                                 break
 
+        # truth for negative label
+        # if matched_truth == -1:
+        #     matched_truth = (0, rel_group_counter, cand_eids_count, [])
+
         # store the output
         cand_output.append(trid)
         cand_output.append(rel_group_counter)
@@ -1055,6 +1246,10 @@ def generate_nest_candidate_output(self, trid, rel_group_counter, rel_group_list
         # format: [0=trig_id, 1-ev-structure-counter, 2-ev-structure-order, 3-ev_label, 4=modality label, 5=[list IN/OUT ids] ]
         cand_output = []
 
+        # TODO: comment this
+        # convert ids to Counter to compare
+        # cand_eids_count = collections.Counter(a2_ids)
+
         ev_label = 0
         mod_label = 1
 
diff --git a/nets/EVNet.py b/model/EVNet.py
similarity index 80%
rename from nets/EVNet.py
rename to model/EVNet.py
index 270e657..440b3b2 100644
--- a/nets/EVNet.py
+++ b/model/EVNet.py
@@ -5,24 +5,19 @@
 import collections
 
 import torch
+from torch.autograd import Variable
 from torch import nn
 import torch.nn.functional as F
 
-cpu_device = torch.device("cpu")
-
-# use gelu instead of relu activation function
-import math
-
-
-def gelu(x):
-    return 0.5 * x * (1 + torch.tanh(math.sqrt(math.pi / 2) * (x + 0.044715 * x ** 3)))
+from utils.utils import gelu
 
+cpu_device = torch.device("cpu")
 
-from nets.EVGen import EV_Generator
+from model.EVGen import EV_Generator
 
 
 class EVModel(nn.Module):
-    """CLASS FOR EVENT LAYERS."""
+    """Event layer."""
 
     def __init__(self, params, sizes):
         super(EVModel, self).__init__()
@@ -32,7 +27,7 @@ def __init__(self, params, sizes):
 
         # dimensions
         if params['ner_reduce'] == False:
-            ent_dim = params['bert_dim'] * 3 + params['etype_dim']  # no reduce
+            ent_dim = params['bert_dim'] * 3 + params['etype_dim']
         else:
             ent_dim = params['ner_reduced_size'] + params['etype_dim']
         rel_dim = params['rel_reduced_size'] + params['rtype_dim'] + ent_dim
@@ -61,23 +56,74 @@ def __init__(self, params, sizes):
         # predict modality
         self.modality_layer = nn.Linear(in_features=params['ev_reduced_size'], out_features=sizes['ev_size'])
 
+        # for ev loss
+        self.bce_with_logits_loss = nn.BCEWithLogitsLoss()
+
         # others
         self.device = params['device']
 
     def get_rel_input(self, rel_preds):
         """Read relation input."""
 
-        l2r = rel_preds['pairs_idx']
-        rpreds_ = rel_preds['preds'].data
+        # span indices for events
+        # training mode
+        if not self.params['predict']:
+            if self.training and self.params['use_gold_ner']:
+                gold_span = True
+            # train event only
+            elif not self.training and self.params['skip_ner'] and self.params['skip_rel'] and self.params[
+                'use_gold_ner']:
+                gold_span = True
+            else:
+                gold_span = False
 
-        # mapping relation type for 'OTHER' type to -1
-        rpred_types = self.params['mappings']['rel2rtype_map'][rpreds_]
+        # predict mode
+        else:
+            if self.params['predict'] and (self.params['gold_eval'] or self.params['pipelines']):
+                gold_span = True
+            else:
+                gold_span = False
 
-        # extract only relation type != 'OTHER' (valid relations)
-        rpred_ids = (rpreds_ != self.params['voc_sizes']['rel_size'] - 1).nonzero().transpose(0, 1)[0]
-        rpred_ids = rpred_ids.to(cpu_device)  # list: contain indices of the valid relations
+        # span indices
+        if gold_span:
+            span_indices = rel_preds['l2r']
+        else:
+            span_indices = rel_preds['pairs_idx']
+
+        # relation indices for events
+        # training mode
+        if not self.params['predict']:
+            if self.training and self.params['use_gold_rel']:
+                gold_rel = True
+
+            # train event only
+            elif not self.training and self.params['skip_ner'] and self.params['skip_rel'] and self.params[
+                'use_gold_rel']:
+                gold_rel = True
+            else:
+                gold_rel = False
 
-        return l2r, rpred_types, rpred_ids
+        # predict mode
+        else:
+            if self.params['predict'] and (self.params['gold_eval'] or self.params['pipelines']):
+                gold_rel = True
+            else:
+                gold_rel = False
+
+        # relation indices
+        if gold_rel:
+            r_indices = rel_preds['truth'].data
+        else:
+            r_indices = rel_preds['preds'].data
+
+        # relation type; non-relation to -1
+        r_types = self.params['mappings']['rel2rtype_map'][r_indices]
+
+        # extract positive relations, ignore non-relation
+        rpos_indices = (r_indices != self.params['voc_sizes']['rel_size'] - 1).nonzero().transpose(0, 1)[0]
+        rpos_indices = rpos_indices.to(cpu_device)
+
+        return span_indices, r_types, rpos_indices
 
     def rtype_embedding_layer(self, rtype_):
         """Relation type embeddings."""
@@ -130,6 +176,7 @@ def get_arg_embeds(self, ent_embeds, rel_embeds, rtype_embeds, ev_arg_ids4nn):
                 a2_embeds = ent_embeds[(a2ids_[0], a2ids_[1])]
                 rt_embeds = rtype_embeds[rids]
 
+
                 args_embeds = torch.cat([r_embeds, rt_embeds, a2_embeds],
                                         dim=-1)  # [number of arguments, rdim+rtypedim+edim]
 
@@ -164,6 +211,7 @@ def event_representation(self, arg_embed_triggers, ev_cand_ids4nn, no_rel_type_e
             # no-argument
             if len(ev_struct[1]) == 0:
 
+                # arg_embed = concat[rel_embed, rel_type_embed, argument_embed]
                 # since there is no argument, rel_embed is set as zeros
                 no_rel_emb = torch.zeros((self.params['rel_reduced_size']), dtype=no_rel_type_embed.dtype,
                                          device=self.device)
@@ -214,8 +262,12 @@ def event_representation(self, arg_embed_triggers, ev_cand_ids4nn, no_rel_type_e
                     args_embeds_list.append(reduced_arg_embed)
 
             # calculate argument embed: by sum up all arguments or average, etc
+            # TODO: currently, use SUM
             args_embed = torch.sum(torch.stack(args_embeds_list, dim=0), dim=0)
 
+            # TODO: average
+            # args_embed = torch.mean(torch.stack(args_embeds_list, dim=0),dim=0)
+
             # event embed: concatenate trigger embed and argument embed
             ev_embeds_.append(torch.cat([tr_embed, args_embed], dim=-1))
 
@@ -313,6 +365,7 @@ def event_nest_representation(self, arg_embed_triggers, ev_cand_ids4nn, no_rel_t
             # no-argument
             if len(ev_struct[1]) == 0:
 
+                # arg_embed = concat[rel_embed, rel_type_embed, argument_embed]
                 # since there is no argument, rel_embed is set as zeros
                 no_rel_emb = torch.zeros((self.params['rel_reduced_size']), dtype=no_rel_type_embed.dtype,
                                          device=self.device)
@@ -363,6 +416,7 @@ def event_nest_representation(self, arg_embed_triggers, ev_cand_ids4nn, no_rel_t
                         for xx2, inid in enumerate(io_ids):
                             if inid == ioid:
                                 pid = pos_ids[xx2]
+                                # pid = pos_ids[io_ids.index(ioid)]
 
                                 # entity argument
                                 if pid == (-1, -1):
@@ -394,8 +448,12 @@ def event_nest_representation(self, arg_embed_triggers, ev_cand_ids4nn, no_rel_t
                     # args_embeds_list.append(reduced_arg_embed)
 
             # calculate argument embed: by sum up all arguments or average, etc
+            # TODO: currently, use SUM
             args_embed = torch.sum(torch.stack(args_embeds_list, dim=0), dim=0)
 
+            # TODO: average
+            # args_embed = torch.mean(torch.stack(args_embeds_list, dim=0),dim=0)
+
             # event embed: concatenate trigger embed and argument embed
             ev_embeds_.append(torch.cat([tr_embed, args_embed], dim=-1))
 
@@ -414,6 +472,7 @@ def predict(self, event_embeds):
 
         threshold = self.params['ev_threshold']
 
+
         event4class = gelu(self.hidden_layer1(event_embeds))
         event4class = gelu(self.hidden_layer2(event4class))
         prediction = self.l_class(event4class)
@@ -431,11 +490,22 @@ def predict(self, event_embeds):
 
         prediction = prediction.flatten()
 
-        # return prediction, modality_pred, positive_idx, positive_ev # revise
         return event4class, prediction, positive_idx, positive_ev_embs
 
+    def calculate_ev_loss(self, prediction, ev_labels_):
+        """Loss."""
+
+        ev_labels = np.vstack(ev_labels_).ravel()
+
+        positive_labels = ev_labels.copy()
+        positive_labels[positive_labels > 0] = 1
+        ev_loss = self.bce_with_logits_loss(prediction,
+                                            torch.tensor(positive_labels, dtype=prediction.dtype, device=self.device))
+
+        return ev_loss
+
     def predict_modality(self, positive_ev_embs, positive_ev_idx, mod_labels_):
-        """Predict modality, return modality predictions."""
+        """Predict modality, return modality predictions and loss."""
 
         # get labels
         mod_labels = np.vstack(mod_labels_).ravel()
@@ -444,7 +514,7 @@ def predict_modality(self, positive_ev_embs, positive_ev_idx, mod_labels_):
         possitive_lbl = torch.tensor((mod_labels[positive_ev_idx] - 1), dtype=torch.long,
                                      device=self.device)
 
-        # prediction
+        # prediction and loss
         if possitive_lbl[possitive_lbl >= 0].shape[0] > 0:
 
             # prediction
@@ -455,11 +525,18 @@ def predict_modality(self, positive_ev_embs, positive_ev_idx, mod_labels_):
             modality_pred = F.softmax(torch.tensor(modality_pred), dim=-1).data
             mod_preds = modality_pred.argmax(dim=-1)
 
+            # loss
+            modality_lbls = possitive_lbl[possitive_lbl >= 0]
+            mod_loss = F.cross_entropy(modality_preds, modality_lbls)
+
+            # TODO: for debug only, remember to commend; modality pred=gold
+            # mod_preds = modality_lbls
 
         else:
             mod_preds = []
+            mod_loss = 0
 
-        return mod_preds
+        return mod_preds, mod_loss
 
     def create_output(self, all_ev_preds):
         """Create output for writing events."""
@@ -537,9 +614,9 @@ def create_output(self, all_ev_preds):
 
         return all_ev_output
 
-    def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn):
+    def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn, n_epoch):
         """
-        Create embeddings, prediction.
+        Create embeddings, prediction, loss.
 
         :param ent_embeds: [batch x a1id x embeds]
         :param rel_embeds: [rids x embeds]
@@ -554,16 +631,26 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn):
                 + list of rids
                 + list of argument ids
 
-        :return: prediction
+        :return: prediction, loss
         """
 
         # store output
         all_preds_output = []
 
-        enable_nested_ev = True
-        enable_modality = True
+        # flag to train nested event or not
+        if n_epoch >= self.params['ev_nested_epoch'] or self.params['predict']:
+            enable_nested_ev = True
+        else:
+            enable_nested_ev = False
+
+        # flag to train modality or not
+        if n_epoch >= self.params['modality_epoch'] or self.params['predict']:
+            enable_modality = True
+        else:
+            enable_modality = False
 
         # store all predictions for flat and nested, maximum as 3 nested levels
+        # TODO: revise the maximum nested level later. Now fix 3 levels
         # positive ids: the current predicted indices; tr_ids: trigger indices of the candidate list
         all_positive_ids = -1 * np.ones((self.params['max_ev_level'] + 1), dtype=np.object)
         all_positive_tr_ids = -1 * np.ones((self.params['max_ev_level'] + 1), dtype=np.object)
@@ -589,12 +676,13 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn):
         # positive_ev_embs: embedding of predicted events: using for the next nested level
         event4class, prediction, positive_idx, positive_ev_embs = self.predict(ev_embeds)
 
-        empty_pred = True
+        # 6-ev loss
+        flat_ev_loss = self.calculate_ev_loss(prediction, ev_flat_cand_ids4nn['ev_labels_'])
 
         # for modality
         if enable_modality:
-            mod_preds = self.predict_modality(positive_ev_embs, positive_idx,
-                                              ev_flat_cand_ids4nn['mod_labels_'])
+            mod_preds, mod_losses = self.predict_modality(positive_ev_embs, positive_idx,
+                                                          ev_flat_cand_ids4nn['mod_labels_'])
         else:
             mod_preds = []
 
@@ -611,6 +699,9 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn):
         # for output
         all_preds_output.append([ev_flat_cand_ids4nn, ev_flat_arg_ids4nn, current_positive_ids, mod_preds])
 
+        # nested loss
+        nest_ev_loss = 0
+
         # loop until stop nested event prediction or no more events predicted, or in limited nested levels
         while enable_nested_ev and len(current_positive_ids) > 0 and current_nested_level < self.params['max_ev_level']:
 
@@ -625,6 +716,7 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn):
             all_positive_ev_embs.append(reduced_ev_emb)
 
             # generate nested candidate indices
+            # 'ev_nest_cand_ids': ev_nest_cands_ids4nn, 'ev_nest_arg_ids4nn': ev_nest_arg_ids4nn
             ev_nest_ids4nn = self.ev_struct_generator._generate_nested_candidates(current_nested_level,
                                                                                   ev_nest_cand_triggers,
                                                                                   current_positive_tr_ids,
@@ -637,8 +729,6 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn):
             current_tr_ids = ev_nest_cand_ids4nn['trids_']
             current_truth_ids = ev_nest_cand_ids4nn['truth_ids_']
 
-            empty_pred = False
-
             # check non-empty
             if len(ev_nest_cand_ids4nn['trids_']) > 0:
 
@@ -654,10 +744,14 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn):
                 # prediction
                 event4class, prediction, positive_idx, positive_ev_embs = self.predict(ev_embeds)
 
+                # ev loss
+                nest_ev_loss += self.calculate_ev_loss(prediction, ev_nest_cand_ids4nn['ev_labels_'])
+
                 # for modality
                 if enable_modality:
-                    mod_preds = self.predict_modality(positive_ev_embs, positive_idx,
-                                                      ev_nest_cand_ids4nn['mod_labels_'])
+                    mod_preds, mod_loss = self.predict_modality(positive_ev_embs, positive_idx,
+                                                                ev_nest_cand_ids4nn['mod_labels_'])
+                    mod_losses += mod_loss
                 else:
                     mod_preds = []
 
@@ -677,16 +771,27 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn):
         # 7-create output for writing events
         pred_ev_output = self.create_output(all_preds_output)
 
-        return pred_ev_output, empty_pred
+        # scale loss: if flat is stable, focus more on nested
+        if current_nested_level == 0:
+            ev_loss = flat_ev_loss
+        else:
+            ev_loss = flat_ev_loss * self.params['flat_ev_scale'] + nest_ev_loss * self.params['nest_ev_scale']
+
+        # add modality loss
+        if enable_modality:
+            ev_loss = ev_loss + mod_losses * self.params['modality_weight']
+
+        return pred_ev_output, ev_loss
 
-    def forward(self, ner_preds, rel_preds):
+    def forward(self, ner_preds, rel_preds, n_epoch):
         """Forward.
-            Given entities and relations, event structures, return event prediction.
+            Given entities and relations, event structures, return event prediction and loss.
         """
         # check empty relation prediction
         if len(rel_preds['preds'].data) == 0:
-            ev_preds = None
-            empty_pred = True
+            # ev_out = None
+            # ev_loss = Variable(torch.zeros(1, device=self.device))
+            return None
 
         else:
             # 1-get input
@@ -706,22 +811,28 @@ def forward(self, ner_preds, rel_preds):
             if np.ndim(rpred_types) > 0:
                 rel_embeds = rel_preds['rel_embeds']
             else:
-                rel_embeds = torch.zeros((1, self.params['rel_reduced_size']), dtype=torch.float32, device=self.device)
+                rel_embeds = torch.zeros((1,self.params['rel_reduced_size']), dtype=torch.float32, device=self.device)
 
                 # avoid scalar error
                 rpred_types = np.array([rpred_types])
 
+
+            # event
+            ev_idx = ner_preds['ev_idxs']
+            ev_truth = ner_preds['truth_evs']
+            ev_lbls = np.array(ner_preds['ev_lbls'])
+
             # 2-generate event candidates
-            ev_ids4nn = self.ev_struct_generator._generate(etypes, tr_ids, l2r, rpred_types, rpred_ids
-                                                           )
+            ev_ids4nn = self.ev_struct_generator._generate(etypes, tr_ids, l2r, rpred_types, rpred_ids, ev_idx,
+                                                           ev_truth, ev_lbls)
 
-            # 3-embeds, prediction
+            # 3-embeds, prediction, and loss
             # check empty
             if len(ev_ids4nn['ev_cand_ids4nn']['trids_']) > 0:
-                ev_preds, empty_pred = self.calculate(ent_embeds, rel_embeds, rpred_types, ev_ids4nn)
+                ev_out, ev_loss = self.calculate(ent_embeds, rel_embeds, rpred_types, ev_ids4nn, n_epoch)
+                return {'output': ev_out, 'loss': ev_loss}
 
             else:
-                ev_preds = None
-                empty_pred = True
-
-        return ev_preds, empty_pred
+                # ev_out = None
+                # ev_loss = Variable(torch.zeros(1, device=self.device))
+                return None
\ No newline at end of file
diff --git a/nets/NERNet.py b/model/NERNet.py
similarity index 70%
rename from nets/NERNet.py
rename to model/NERNet.py
index 6e8ccbc..58cc5ab 100644
--- a/nets/NERNet.py
+++ b/model/NERNet.py
@@ -2,8 +2,11 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from torch.nn import functional as F
 
-from bert.modeling import BertModel, BertPreTrainedModel
+from torchnlp.word_to_vector.pretrained_word_vectors import _PretrainedWordVectors
+
+from bert.modeling import BertModel, BertPreTrainedModel, BertLayerNorm
 
 
 class NestedNERModel(BertPreTrainedModel):
@@ -20,12 +23,44 @@ def __init__(self, config, params):
 
         self.max_span_width = params["max_span_width"]
 
-        self.bert = BertModel(config)
+        # for lstm
+        if self.params['use_lstm']:
+            self.pretrain_word_vectors = _PretrainedWordVectors(
+                name=params["pretrain_word_model"],
+                cache="caches",
+            )
+
+            self.lstm = nn.LSTM(
+                input_size=self.pretrain_word_vectors.dim,
+                hidden_size=config.hidden_size // 2,
+                num_layers=2,
+                batch_first=True,
+                dropout=config.hidden_dropout_prob,
+                bidirectional=True,
+            )
+
+        # or bert
+        else:
+            self.bert = BertModel(config)
 
         self.dropout = nn.Dropout(config.hidden_dropout_prob)
 
-        self.entity_classifier = nn.Linear(config.hidden_size * 3, self.num_entities)
-        self.trigger_classifier = nn.Linear(config.hidden_size * 3, self.num_triggers)
+        if params['ner_reduce']:
+            reduced_size = params['ner_reduced_size']
+
+            # ! REDUCE
+            self.reduce = nn.Sequential(
+                nn.Linear(config.hidden_size * 3, reduced_size),
+                # nn.ReLU(),
+                # nn.Linear(1024, 1024),
+                BertLayerNorm(reduced_size, eps=1e-12),
+                nn.Dropout(config.hidden_dropout_prob),
+            )
+            self.entity_classifier = nn.Linear(reduced_size, self.num_entities)
+            self.trigger_classifier = nn.Linear(reduced_size, self.num_triggers)
+        else:
+            self.entity_classifier = nn.Linear(config.hidden_size * 3, self.num_entities)
+            self.trigger_classifier = nn.Linear(config.hidden_size * 3, self.num_triggers)
 
         self.register_buffer(
             "label_ids",
@@ -50,9 +85,25 @@ def forward(
         device = all_ids.device
         max_span_width = self.max_span_width
 
-        embeddings, sentence_embedding = self.bert(
+        # use bert
+        if self.params['use_lstm']:
+            word_embeddings = torch.stack([self.pretrain_word_vectors[tokens].to(device=device) for tokens in all_tokens])
+
+            self.lstm.flatten_parameters()
+
+            lstm_embeddings, _ = self.lstm(word_embeddings)
+
+            embeddings = lstm_embeddings
+            sentence_embedding = lstm_embeddings[:, 0]
+
+        # or bert
+        else:
+            embeddings, sentence_embedding = self.bert(
             all_ids, attention_mask=all_attention_masks, output_all_encoded_layers=False
-        )  # (B, S, H) (B, 128, 768)
+            )  # (B, S, H) (B, 128, 768)
+
+        # ! REDUCE
+        # embeddings = self.dropout(embeddings)  # (B, S, H) (B, 128, 768)
 
         flattened_token_masks = all_token_masks.flatten()  # (B * S, )
 
@@ -62,9 +113,17 @@ def forward(
             flattened_token_masks
         )  # (all_actual_tokens, )
 
-        flattened_embeddings = torch.index_select(
+        # for lstm
+        if self.params['use_lstm']:
+            flattened_embeddings = torch.index_select(
+                embeddings.reshape(-1, embeddings.size(-1)), 0, flattened_embedding_indices
+            )  # (all_actual_tokens, H)
+
+        # or bert
+        else:
+            flattened_embeddings = torch.index_select(
             embeddings.view(-1, embeddings.size(-1)), 0, flattened_embedding_indices
-        )  # (all_actual_tokens, H)
+            )  # (all_actual_tokens, H)
 
         span_starts = (
             torch.arange(flattened_embeddings.size(0), device=device)
@@ -177,6 +236,7 @@ def forward(
                 span_start_embeddings,
                 span_mean_embeddings,
                 span_end_embeddings,
+                # span_width_embeddings,
             ),
             dim=1,
         )  # (all_valid_spans, H * 3 + distance_dim)
@@ -210,6 +270,16 @@ def forward(
             all_span_masks
         ]  # (all_valid_spans, num_entities + num_triggers)
 
+        actual_trigger_labels, actual_entity_labels = torch.split(
+            actual_span_labels, [self.num_triggers, self.num_entities], dim=-1
+        )  # (all_valid_spans, num_entities), (all_valid_spans, num_triggers)
+
+        # criterion = nn.CrossEntropyLoss(weight=self.class_weights)
+
+        # return F.binary_cross_entropy_with_logits(
+        #     preds, actual_span_labels, weight=self.class_weights
+        # )  # Computes loss
+
         all_preds = torch.cat(
             (trigger_preds, entity_preds), dim=-1
         )  # (all_valid_spans, num_entities + num_triggers)
@@ -223,6 +293,16 @@ def forward(
         all_preds[~all_trigger_masks, : self.num_triggers] = 0
         all_preds[~all_entity_masks, self.num_triggers:] = 0
 
+        # Compute entity loss
+        entity_loss = F.binary_cross_entropy_with_logits(
+            entity_preds[all_entity_masks], actual_entity_labels[all_entity_masks]
+        )
+
+        # Compute trigger loss
+        trigger_loss = F.binary_cross_entropy_with_logits(
+            trigger_preds[all_trigger_masks], actual_trigger_labels[all_trigger_masks]
+        )
+
         # Support for random-noise adding trick
         entity_coeff = all_entity_masks.sum().float()
         trigger_coeff = all_trigger_masks.sum().float()
@@ -231,6 +311,14 @@ def forward(
         entity_coeff /= denominator
         trigger_coeff /= denominator
 
+        if self.num_triggers > 0:
+            total_loss = entity_coeff * entity_loss + trigger_coeff * trigger_loss
+        else:
+            total_loss = entity_coeff * entity_loss
+
+        # In case the corpus don't have triggers
+        # total_loss = entity_loss
+
         _, all_preds_top_indices = torch.topk(all_preds, k=self.ner_label_limit, dim=-1)
 
         # Convert binary value to label ids
@@ -268,7 +356,13 @@ def forward(
 
         all_aligned_preds = np.array(all_aligned_preds)
 
+        # For checking, will be commented if passes for all tests
+        # assert (
+        #     np.sort(all_aligned_preds, axis=-1) == np.sort(all_preds, axis=-1)
+        # ).all()
+
         return (
+            total_loss,
             all_aligned_preds,
             all_golds,
             sentence_sections,
diff --git a/model/RELNet.py b/model/RELNet.py
new file mode 100644
index 0000000..e511ae2
--- /dev/null
+++ b/model/RELNet.py
@@ -0,0 +1,309 @@
+import numpy as np
+import torch
+import torch.nn.functional as f
+from torch import nn
+
+from eval.evalRE import calc_stats
+from utils.utils import gelu
+
+
+class RELModel(nn.Module):
+    """Relation layer."""
+
+    def __init__(self, params, sizes):
+        super(RELModel, self).__init__()
+
+        # entity type
+        self.type_embed = nn.Embedding(num_embeddings=sizes['etype_size'] + 1,
+                                       embedding_dim=params['etype_dim'],
+                                       padding_idx=sizes['etype_size'])
+
+        # entity dim
+        if params['ner_reduce'] == False:
+            ent_dim = params['bert_dim'] * 3 + params['etype_dim']
+        else:
+            ent_dim = params['ner_reduced_size'] + params['etype_dim']
+
+        # layers
+        self.hidden_layer1 = nn.Linear(in_features=2 * ent_dim + params['bert_dim'],
+                                       out_features=params['hidden_dim'], bias=False)
+        self.hidden_layer2 = nn.Linear(in_features=params['hidden_dim'],
+                                       out_features=params['rel_reduced_size'], bias=False)
+        self.l_class = nn.Linear(in_features=params['rel_reduced_size'],
+                                 out_features=sizes['rel_size'])
+
+        # others
+        self.device = params['device']
+        self.params = params
+        self.sizes = sizes
+
+    def _create_type_representation(self, bert_embeds, etypes_):
+        """Create entity type embeddings"""
+
+        # get dim
+        self.b, self.w, _ = bert_embeds.shape
+        self.e = etypes_.shape[1]
+
+        # non-entity
+        etypes_[etypes_ == -1] = self.sizes['etype_size']
+
+        # type embeddings
+        etype_embeds = self.type_embed(etypes_)  # (batch_size, entity_dim, type_dim)
+
+        return etype_embeds
+
+    def _create_pair_representation(self, etok_embeds, etype_embeds):
+        """Create entity pair embeddings: Represent a sentence as a matrix of shape(B, E, E, dim)"""
+
+        # concat: entities token and type embeddings
+        pair_embeds = torch.cat((etok_embeds, etype_embeds), dim=2)
+
+        # save for event layer
+        type2_embeds = pair_embeds.clone()
+
+        return pair_embeds, type2_embeds
+
+    def _generate_l2r_pairs(self, pair_embeds, s_embeds, indices, rgtruth):
+        """Generate left-to-right pair candidates embeddings"""
+
+        # pair embeddings
+        l2r_embeds = torch.cat(
+            (pair_embeds[(indices[0], indices[1])], pair_embeds[(indices[0], indices[2])], s_embeds[indices[0]]),
+            dim=-1)
+
+        # pair labels
+        l2r_truth = []
+        for b, l, r in zip(indices[0], indices[1], indices[2]):
+            l2r_truth.append(rgtruth[b.item()].get((l.item(), r.item()), -1))
+        l2r_truth = np.asarray(l2r_truth)
+
+        return l2r_embeds, l2r_truth
+
+    def _generate_r2l_pairs(self, pair_embeds, s_embeds, indices, rgtruth):
+        """Generate right-to-left pair candidates embeddings"""
+
+        # pair embeddings
+        r2l_embeds = torch.cat(
+            (pair_embeds[(indices[0], indices[2])], pair_embeds[(indices[0], indices[1])], s_embeds[indices[0]]),
+            dim=-1)
+
+        # pair labels
+        r2l_truth = []
+        for b, r, l in zip(indices[0], indices[2], indices[1]):
+            r2l_truth.append(rgtruth[b.item()].get((r.item(), l.item()), -1))
+        r2l_truth = np.asarray(r2l_truth)
+
+        return r2l_embeds, r2l_truth
+
+    def _transpose_gold_indices(self, g_indices_):
+        """Extract gold pairs indices"""
+        # gold indices: batch, left, right
+        gids_b = []
+        gids_l = []
+        gids_r = []
+        for b_idx, l2r_batch in enumerate(g_indices_):
+            if l2r_batch:
+                gids_b.extend([b_idx] * len(l2r_batch[0]))
+                gids_l.extend(l2r_batch[0])
+                gids_r.extend(l2r_batch[1])
+        g_indices = np.asarray([gids_b, gids_l, gids_r])
+        return g_indices
+
+    def predict(self, pair_embeds, g_indices_, p_indices, rgtruth_, sent_embeds):
+        """Classify relations."""
+
+        # 1-dropout
+        if self.training:
+            if self.params['dropout'] > 0:
+                pair_embeds = f.dropout(pair_embeds, p=self.params['dropout'])
+
+        # 2-transpose gold pairs indices
+        g_indices = self._transpose_gold_indices(g_indices_)
+
+        # 3-create left-to-right pairs
+        # 3.1-training mode
+        if not self.params['predict']:
+
+            # i-gold ner
+            if self.training and self.params['use_gold_ner']:
+                use_gold = True
+
+            # ii-train relation only: use gold ner
+            elif not self.training and self.params['skip_ner'] and self.params['rel_epoch'] >= (
+                    self.params['epoch'] - 1) and self.params['use_gold_ner']:
+                use_gold = True
+
+            # iii-train event only: use gold rel
+            elif not self.training and self.params['skip_ner'] and self.params['skip_rel'] and self.params[
+                'use_gold_rel']:
+                use_gold = True
+
+            # iv-
+            else:
+                use_gold = False
+
+        # 3.2-predict mode
+        else:
+
+            # gold or pipeline
+            if self.params['gold_eval'] or self.params['pipelines']:
+                if self.params['pipelines'] and self.params['pipe_flag'] != 2:
+                    use_gold = False
+                else:
+                    use_gold = True
+
+            # joint
+            else:
+                use_gold = False
+
+        # 3.3-get pair candidates embeddings and labels: from gold or predicted indices
+        if use_gold:
+            l2r_embeds, l2r_truth = self._generate_l2r_pairs(pair_embeds, sent_embeds, g_indices, rgtruth_)
+        else:
+            l2r_embeds, l2r_truth = self._generate_l2r_pairs(pair_embeds, sent_embeds, p_indices, rgtruth_)
+
+        # 4-for non-relation label
+        if not self.params['predict']:
+            if np.ndim(l2r_truth) > 0:
+                l2r_truth[l2r_truth == -1] = self.params['mappings']['rel_map']['1:Other:2']
+            else:
+                if l2r_truth == -1:
+                    l2r_truth = self.params['mappings']['rel_map']['1:Other:2']
+                l2r_truth = np.array([l2r_truth])
+
+        # 5-NN on left-to-right pairs
+        rel_l2r_embeds = gelu(self.hidden_layer1(l2r_embeds))
+        rel_l2r_embeds = gelu(self.hidden_layer2(rel_l2r_embeds))
+        l2r_preds = self.l_class(rel_l2r_embeds)  # (B*r, N)
+
+        # 6-check dim
+        if not self.params['predict']:
+            assert (l2r_preds.shape[0] == l2r_truth.shape[0]), \
+                "mismatch in ground-truth & prediction shapes left-to-right"
+
+        # 7-both directions
+        if self.params['direction'] != 'l2r':
+
+            # training mode
+            if not self.params['predict']:
+
+                # i-gold ner
+                if self.training and self.params['use_gold_ner']:
+                    use_gold = True
+
+                # ii-train rel only
+                elif not self.training and self.params['skip_ner'] and self.params['rel_epoch'] >= (
+                        self.params['epoch'] - 1) and self.params['use_gold_ner']:
+                    use_gold = True
+
+                # iii-train ev only
+                elif not self.training and self.params['skip_ner'] and self.params['skip_rel'] and self.params[
+                    'use_gold_rel']:
+                    use_gold = True
+
+                # iv
+                else:
+                    use_gold = False
+
+            # predict mode
+            else:
+
+                # gold or pipeline
+                if self.params['gold_eval'] or self.params['pipelines']:
+                    if self.params['pipelines'] and self.params['pipe_flag'] != 2:
+                        use_gold = False
+                    else:
+                        use_gold = True
+                else:
+                    use_gold = False
+
+            # pair candidates embeddings and labels
+            if use_gold:
+                r2l_embeds, r2l_truth = self._generate_r2l_pairs(pair_embeds, sent_embeds, g_indices, rgtruth_)
+            else:
+                r2l_embeds, r2l_truth = self._generate_r2l_pairs(pair_embeds, sent_embeds, p_indices, rgtruth_)
+
+            # non-relation type
+            if not self.params['predict']:
+                if np.ndim(r2l_truth) > 0:
+                    r2l_truth[r2l_truth == -1] = self.params['mappings']['rel_map']['1:Other:2']
+                else:
+                    if r2l_truth == -1:
+                        r2l_truth = self.params['mappings']['rel_map']['1:Other:2']
+                    r2l_truth = np.array([r2l_truth])
+
+            # NN for right-to-left pairs
+            rel_r2l_embeds = gelu(self.hidden_layer1(r2l_embeds))
+            rel_r2l_embeds = gelu(self.hidden_layer2(rel_r2l_embeds))
+            r2l_preds = self.l_class(rel_r2l_embeds)
+
+            # check dim
+            if not self.params['predict']:
+                assert (r2l_preds.shape[0] == r2l_truth.shape[0]), \
+                    "mismatch in ground-truth & prediction shapes right-to-left"
+
+            # both directions
+            return rel_l2r_embeds, l2r_preds, l2r_truth, rel_r2l_embeds, r2l_preds, r2l_truth, pair_embeds, g_indices
+
+        # only left-to-right
+        else:
+            return rel_l2r_embeds, l2r_preds, l2r_truth, pair_embeds, g_indices
+
+    def forward(self, batch_input):
+
+        # 1-entity type embeddings
+        type_embeds = self._create_type_representation(batch_input['embeddings'], batch_input['ent_types'])
+
+        # 2-create pair embeddings
+        pair_embeds, type2_embeds = self._create_pair_representation(batch_input['ent_embeds'], type_embeds)
+        pair_embeds = pair_embeds.view(self.b, self.e, pair_embeds.shape[2])
+
+        # 3-predictions and labels
+        predictions = self.predict(pair_embeds, batch_input['l2rs'], batch_input['pairs_idx'], batch_input['gtruths'],
+                                   batch_input['sentence_embeds'])
+
+        # 4-classify: use both directions
+        acc_loss = 0
+        if self.params['direction'] != 'lr2':
+
+            # get output
+            rel_l2r_embeds, l2r_preds, l2r_truth, rel_r2l_embeds, r2l_preds, r2l_truth, pair_embeds, g_indices = predictions
+
+            # training
+            if not self.params['predict']:
+                if l2r_preds.shape[0] == 0:
+                    return {'valid': False}
+
+                # relation loss
+                l2r_loss = f.cross_entropy(l2r_preds, torch.tensor(l2r_truth, device=self.device).long())
+                r2l_loss = f.cross_entropy(r2l_preds, torch.tensor(r2l_truth, device=self.device).long())
+                acc_loss = l2r_loss + r2l_loss
+
+            # prediction and label
+            r_preds = (f.softmax(l2r_preds, dim=1).data, f.softmax(r2l_preds, dim=1).data)
+            r_gtruth = (l2r_truth, r2l_truth)
+
+        # use only left-to-right direction
+        else:
+
+            # get output
+            rel_l2r_embeds, l2r_preds, l2r_truth, pair_embeds, g_indices = predictions
+
+            # training
+            if not self.params['predict']:
+                # relation loss
+                acc_loss = f.cross_entropy(l2r_preds, torch.tensor(l2r_truth, device=self.device).long())
+
+            # prediction and label
+            r_preds = f.softmax(l2r_preds, dim=1).data
+            r_gtruth = l2r_truth.data
+
+        # get predicted type and scores
+        new_rpreds, new_rgtruth, no_matched_rels, true_pos, false_pos, false_neg = calc_stats(r_preds, r_gtruth,
+                                                                                              self.params)
+
+        return {'valid': True, 'true_pos': true_pos, 'false_pos': false_pos, 'false_neg': false_neg,
+                'preds': new_rpreds, 'enttoks_type_embeds': type2_embeds,
+                'truth': new_rgtruth, 'no_matched_rel': no_matched_rels,
+                'l2r': g_indices, 'pairs_idx': batch_input['pairs_idx'], 'rel_embeds': rel_l2r_embeds,
+                'pair4class': pair_embeds, 'loss': acc_loss}
diff --git a/model/deepEM.py b/model/deepEM.py
new file mode 100644
index 0000000..4e8958c
--- /dev/null
+++ b/model/deepEM.py
@@ -0,0 +1,484 @@
+import copy
+from collections import defaultdict
+
+import numpy as np
+import torch
+import torch.nn.functional as f
+from torch import nn
+from torch.autograd import Variable
+
+from model import EVNet
+from model import RELNet
+from model.NERNet import NestedNERModel
+from utils import utils
+
+cpu_device = torch.device("cpu")
+
+
+class DeepEM(nn.Module):
+    """
+    Network architecture
+    """
+
+    def __init__(self, params):
+        super(DeepEM, self).__init__()
+
+        sizes = params['voc_sizes']
+        device = params['device']
+
+        self.NER_layer = NestedNERModel.from_pretrained(params['bert_model'], params=params)
+        self.REL_layer = RELNet.RELModel(params, sizes)
+        self.EV_layer = EVNet.EVModel(params, sizes)
+
+        self.trigger_id = -1
+
+        if params['train']:
+            self.beta = 1
+        else:
+            self.beta = params['beta']
+
+        self.device = device
+        self.params = params
+
+    def process_ner_output(self, nn_tokens, nn_ids, nn_token_mask, nn_attention_mask, nn_entity_masks, nn_trigger_masks,
+                           nn_span_labels, span_terms, max_span_labels, nn_span_indices):
+        """Process NER output to prepare for training relation and event layers"""
+
+        # entity output
+        ner_preds = {}
+
+        # predict entity
+        ner_loss, e_preds, e_golds, sentence_sections, span_masks, embeddings, sentence_emb, trigger_indices = self.NER_layer(
+            all_tokens=nn_tokens,
+            all_ids=nn_ids,
+            all_token_masks=nn_token_mask,
+            all_attention_masks=nn_attention_mask,
+            all_entity_masks=nn_entity_masks,
+            all_trigger_masks=nn_trigger_masks,
+            all_span_labels=nn_span_labels,
+        )
+
+        # ! Note that these below lines run on CPU
+        sentence_sections = sentence_sections.detach().cpu().numpy()[:-1]
+        all_span_masks = span_masks.detach() > 0
+
+        # Embedding of each span
+        embeddings = torch.split(embeddings, torch.sum(all_span_masks, dim=-1).tolist())
+
+        # Pred of each span
+        e_preds = np.split(e_preds.astype(int), sentence_sections)
+        e_preds = [pred.flatten() for pred in e_preds]
+        ner_preds['preds'] = e_preds
+
+        e_golds = np.split(e_golds.astype(int), sentence_sections)
+        e_golds = [gold.flatten() for gold in e_golds]
+        ner_preds['golds'] = e_golds
+        ner_preds['gold_terms'] = copy.deepcopy(span_terms)
+
+        replace_term = True
+        if self.params['predict']:
+            if self.params['gold_eval'] or (self.params['pipelines'] and self.params['pipe_flag'] != 0):
+                replace_term = False
+
+        if self.params["ner_predict_all"]:
+            if self.params['predict']:
+                if self.params['gold_eval'] or (self.params['pipelines'] and self.params['pipe_flag'] != 0):
+                    e_preds = e_golds
+                    span_terms = ner_preds['gold_terms']
+            else:
+                if self.params['skip_ner'] and self.params['skip_rel'] and self.params['use_gold_ner'] and self.params[
+                    'use_gold_rel']:
+                    e_preds = e_golds
+                    span_terms = ner_preds['gold_terms']
+
+            if replace_term:
+                for items in span_terms:
+                    items.term2id.clear()
+                    items.id2term.clear()
+
+                # Overwrite triggers
+                if self.trigger_id == -1:
+                    self.trigger_id = utils.get_max_entity_id(span_terms) + 10000
+
+                trigger_idx = self.trigger_id + 1
+                for sentence_idx, span_preds in enumerate(e_preds):
+                    for pred_idx, label_id in enumerate(span_preds):
+                        if label_id > 0:
+                            term = "T" + str(trigger_idx)
+
+                            # check trigger
+                            if label_id in self.params['mappings']['nn_mapping']['trTypes_Ids']:
+                                term = "TR" + str(trigger_idx)
+
+                            span_terms[sentence_idx].id2term[pred_idx] = term
+                            span_terms[sentence_idx].term2id[term] = pred_idx
+                            trigger_idx += 1
+
+                self.trigger_id = trigger_idx
+        else:
+            if replace_term:
+                # Overwrite triggers
+                if self.trigger_id == -1:
+                    self.trigger_id = utils.get_max_entity_id(span_terms) + 10000
+
+                trigger_idx = self.trigger_id + 1
+                for sentence_idx, span_preds in enumerate(e_preds):
+                    # Update gold labels
+
+                    # store gold entity index (a1)
+                    a1ent_set = set()
+
+                    for span_idx, span_term in span_terms[sentence_idx].id2term.items():
+
+                        if span_term != "O" and not span_term.startswith("TR") and span_preds[span_idx] != 255:
+
+                            # but do not replace for entity in a2 files
+                            span_label = span_terms[sentence_idx].id2label[
+                                span_idx]  # entity type, e.g: Gene_or_gene_product
+                            if span_label not in self.params['a2_entities']:
+                                # replace for entity (using gold entity)
+                                span_preds[span_idx] = e_golds[sentence_idx][span_idx]
+
+                                # save this index to ignore prediction
+                                a1ent_set.add(span_idx)
+
+                    for pred_idx, label_id in enumerate(span_preds):
+                        span_term = span_terms[sentence_idx].id2term.get(pred_idx, "O")
+
+                        # if this entity in a1: skip this span
+                        if pred_idx in a1ent_set:
+                            continue
+
+                        remove_span = False
+
+                        # add prediction for trigger or entity a2
+                        if label_id > 0:
+                            term = ''
+
+                            # check trigger
+                            if label_id in self.params['mappings']['nn_mapping']['trTypes_Ids']:
+                                term = "TR" + str(trigger_idx)
+
+                            # is entity
+                            else:
+                                etype_label = self.params['mappings']['nn_mapping']['id_tag_mapping'][label_id]
+
+                                # check this entity type in a2 or not
+                                if etype_label in self.params['a2_entities']:
+                                    term = "T" + str(trigger_idx)
+                                else:
+                                    remove_span = True
+
+                            if len(term) > 0:
+                                span_terms[sentence_idx].id2term[pred_idx] = term
+                                span_terms[sentence_idx].term2id[term] = pred_idx
+                                trigger_idx += 1
+
+                        # null prediction
+                        if label_id == 0 or remove_span:
+                            # do not write anything
+                            span_preds[pred_idx] = 0
+
+                            # remove this span
+                            if span_term.startswith("T"):
+                                del span_terms[sentence_idx].id2term[pred_idx]
+                                del span_terms[sentence_idx].term2id[span_term]
+
+                    span_preds[span_preds == 255] = 0
+                self.trigger_id = trigger_idx
+
+        num_padding = max_span_labels * self.params["ner_label_limit"]
+
+        e_preds = [np.pad(pred, (0, num_padding - pred.shape[0]),
+                          'constant', constant_values=-1) for pred in e_preds]
+        e_golds = [np.pad(gold, (0, num_padding - gold.shape[0]),
+                          'constant', constant_values=-1) for gold in e_golds]
+
+        e_preds = torch.tensor(e_preds, device=self.device)
+        nn_span_labels = torch.tensor(e_golds, device=self.device)
+
+        embeddings = [f.pad(embedding, (0, 0, 0, max_span_labels - embedding.shape[0]),
+                            'constant', value=0) for embedding in embeddings]
+
+        embeddings = torch.stack(embeddings)
+        embeddings = embeddings.unsqueeze(dim=2).expand(-1, -1, self.params["ner_label_limit"], -1)
+        embeddings = embeddings.reshape(embeddings.size(0), -1, embeddings.size(-1))
+
+        # output for ner
+        ner_preds['loss'] = ner_loss
+        ner_preds['terms'] = span_terms
+        ner_preds['span_indices'] = nn_span_indices
+
+        # For pre-train event layer
+        use_gold = False
+        if (not self.params['predict'] and self.params['skip_ner'] and self.params['skip_rel'] and self.params[
+            'use_gold_ner'] and self.params['use_gold_rel']) or (self.params['gold_eval'] or self.params['pipelines']):
+            use_gold = True
+        if use_gold:
+            ner_preds['nner_preds'] = e_golds
+        else:
+            ner_preds['nner_preds'] = e_preds.detach().cpu().numpy()
+
+        return embeddings, e_preds, e_golds, nn_span_labels, sentence_emb, ner_preds
+
+    def generate_entity_pairs_4rel(self, bert_embeds, p_span_indices, g_span_indices):
+        """Prepare entity pairs for relation candidates"""
+
+        # use gold or predicted span indices
+        # training mode
+        if not self.params['predict']:
+            if self.training and self.params['use_gold_ner']:
+                use_gold = True
+            # train relation only
+            elif not self.training and self.params['skip_ner'] and self.params['rel_epoch'] >= (
+                    self.params['epoch'] - 1) and self.params['use_gold_ner']:
+                use_gold = True
+            # train event only
+            elif not self.training and self.params['skip_ner'] and self.params['skip_rel'] and self.params[
+                'use_gold_rel']:
+                use_gold = True
+            else:
+                use_gold = False
+
+        # predict mode
+        else:
+            if self.params['gold_eval'] or self.params['pipelines']:
+                use_gold = True
+            else:
+                use_gold = False
+
+        if use_gold:
+            span_indices = g_span_indices
+        else:
+            span_indices = p_span_indices
+
+        # positive indices
+        pos_indices = (span_indices > 0).nonzero().transpose(0, 1).long()
+
+        # entity types
+        e_types = torch.full((span_indices.shape[0], span_indices.shape[1]), -1, dtype=torch.int64,
+                             device=self.device)
+
+        # entity and trigger indices
+        e_indices = torch.zeros((span_indices.shape[0], span_indices.shape[1]), dtype=torch.long)
+        tr_indices = torch.zeros((span_indices.shape), dtype=torch.int64, device=self.device)
+
+        # store entity indices in batch and list of triggers
+        batch_eids_list = defaultdict(list)
+        tr_list = []
+
+        # store entity in each batch
+        batch_ent_list = defaultdict(list)
+
+        for batch_id, a1id in enumerate(pos_indices[0]):
+
+            # index
+            a2id = pos_indices[1][batch_id]
+
+            # entity type
+            type_a1 = self.params['mappings']['nn_mapping']['tag2type_map'][span_indices[a1id][a2id].item()]
+            e_types[a1id][a2id] = torch.tensor(type_a1, device=self.device)
+
+            # masked
+            e_indices[a1id][a2id] = 1
+
+            # trigger
+            if type_a1 in self.params['trTypes_Ids']:
+                tr_indices[a1id][a2id] = 1
+                tr_list.append((a1id, a2id))
+
+            # entity
+            else:
+                batch_ent_list[a1id.item()].append(a2id)
+
+            batch_eids_list[a1id.item()].append(a2id)
+
+        # prepare for entity and trigger embeddings
+        e_embeds = bert_embeds.clone()
+        tr_embeds = bert_embeds.clone()
+        e_embeds[e_indices == 0] = torch.zeros((bert_embeds.shape[2]), dtype=bert_embeds.dtype, device=self.device)
+        tr_embeds[tr_indices == 0] = torch.zeros((bert_embeds.shape[2]), dtype=bert_embeds.dtype, device=self.device)
+
+        # indices of pairs (trigger-entity OR trigger-trigger) for relation candidates
+        pair_indices = []
+
+        if len(tr_list):
+            for batch_id, trig_id in tr_list:
+                if len(batch_eids_list[batch_id.item()]) > 1:
+
+                    # enable relation between triggers
+                    if self.params['enable_triggers_pair']:
+                        # get all entity ids in this batch
+                        b_eids = batch_eids_list[batch_id.item()].copy()
+
+                        # remove this trigger to avoid self relation
+                        b_eids.remove(trig_id.clone().detach())
+
+                    # or only between trigger and entity
+                    else:
+                        # pair with only entity
+                        b_eids = batch_ent_list[batch_id.item()].copy()
+
+                    # check empty
+                    if len(b_eids) > 0:
+                        # make pairs
+                        batch_pair_idx = torch.tensor([[batch_id], [trig_id]]).repeat(1, len(b_eids))
+                        batch_pair_idx = torch.cat(
+                            (batch_pair_idx, torch.tensor(b_eids).view(1, len(b_eids))), dim=0)
+
+                        # add to pairs
+                        pair_indices.append(batch_pair_idx)
+
+            if len(pair_indices) > 0:
+                pair_indices = torch.cat(pair_indices, dim=-1)
+
+        return e_embeds, tr_embeds, e_types, tr_indices, pair_indices
+
+    def _init_joint(self, n_epoch):
+        """Flags to enable using the predicted from the previous output or not"""
+
+        # init layer output
+        rel_preds = None
+        ev_preds = None
+
+        # enable jointly training
+        enable_rel = True
+        enable_ev = True
+
+        # training
+        if not self.params['predict']:
+
+            # pre-train ner only: unable relation and event layers
+            if not self.params['skip_ner'] and n_epoch <= self.params['ner_epoch']:
+                enable_rel = False
+                enable_ev = False
+
+            # pre-train relation only: unable event layer
+            if not self.params['skip_rel'] and n_epoch <= self.params['rel_epoch']:
+                enable_ev = False
+
+        # predict on pipeline mode
+        elif self.params['predict'] and self.params['pipelines']:
+
+            # for ner
+            if self.params['pipe_flag'] == 0:
+                enable_rel = False
+                enable_ev = False
+
+            # for relation
+            elif self.params['pipe_flag'] == 1:
+                enable_rel = True
+                enable_ev = False
+
+            # for event
+            else:
+                enable_rel = False
+                enable_ev = True
+
+        return enable_rel, enable_ev, rel_preds, ev_preds
+
+    def _accumulate_loss(self, ner_preds, rel_preds, ev_preds, n_epoch):
+        """To calculate the total loss from the layers' loss"""
+
+        # total loss
+        acc_loss = 0
+
+        if not self.params['predict']:
+            # add ner loss
+            if not self.params['skip_ner']:
+
+                # add scaled loss according to the epoch range
+                if n_epoch <= self.params['ner_epoch_limit']:
+                    acc_loss = ner_preds['loss'] * self.params['ner_loss_weight_main']
+                else:
+                    acc_loss = ner_preds['loss'] * self.params['ner_loss_weight_minor']
+
+            # add relation loss
+            if not self.params['skip_rel'] and rel_preds != None:
+
+                # check non-empty
+                if rel_preds['valid']:
+
+                    # add scaled loss according to the epoch range
+                    if n_epoch <= self.params['rel_epoch_limit'] and n_epoch > self.params['ner_epoch_limit']:
+                        acc_loss += rel_preds['loss'] * self.params['rel_loss_weight_main']
+                    else:
+                        acc_loss += rel_preds['loss'] * self.params['rel_loss_weight_minor']
+
+            # add event loss
+            if ev_preds != None:
+
+                # add scaled loss according to the epoch range
+                if n_epoch <= self.params['rel_epoch_limit']:
+                    acc_loss += ev_preds['loss'] * self.params['ev_loss_weight_minor']
+                else:
+                    acc_loss += ev_preds['loss'] * self.params['ev_loss_weight_main']
+
+        # zero
+        if acc_loss == 0:
+            acc_loss = Variable(torch.zeros(1, device=self.params['device']))
+
+        return acc_loss
+
+    def forward(self, batch_input, n_epoch=0):
+
+        """Joint model interface."""
+
+        # 1 - get input
+        nn_tokens, nn_ids, nn_token_mask, nn_attention_mask, nn_span_indices, nn_span_labels, nn_span_labels_match_rel, nn_entity_masks, nn_trigger_masks, nn_gtruth, nn_l2r, span_terms, \
+        nn_truth_ev, nn_ev_idxs, ev_lbls, etypes, max_span_labels = batch_input
+
+        # 2 - predict entity and process output
+        embeddings, e_preds, e_golds, nn_span_labels, sentence_emb, ner_preds = self.process_ner_output(
+            nn_tokens, nn_ids,
+            nn_token_mask,
+            nn_attention_mask,
+            nn_entity_masks,
+            nn_trigger_masks,
+            nn_span_labels,
+            span_terms,
+            max_span_labels,
+            nn_span_indices
+        )
+
+        # 3 - initialize joint training
+        enable_rel, enable_ev, rel_preds, ev_preds = self._init_joint(n_epoch)
+
+        # 4 - joint training
+        if enable_rel or enable_ev:
+
+            # 4.1 - prepare input for joint model
+            e_embeds, tr_embeds, e_types, tr_ids, pair_indices = self.generate_entity_pairs_4rel(bert_embeds=embeddings,
+                                                                                                 p_span_indices=e_preds,
+                                                                                                 g_span_indices=nn_span_labels)
+
+            # check non-empty
+            if len(pair_indices) > 0:
+
+                joint_input = {'preds': e_preds, 'golds': e_golds, 'embeddings': embeddings,
+                               'ent_embeds': e_embeds, 'tr_embeds': tr_embeds, 'tr_ids': tr_ids,
+                               'ent_types': e_types, 'pairs_idx': pair_indices, 'e_types': etypes.long(),
+                               'l2rs': nn_l2r,
+                               'gtruths': nn_gtruth, 'truth_evs': nn_truth_ev, 'ev_idxs': nn_ev_idxs,
+                               'ev_lbls': ev_lbls,
+                               'sentence_embeds': sentence_emb}
+
+                # 4.2 - training relation layer
+                if enable_rel:
+                    rel_preds = self.REL_layer(joint_input)
+
+                # 4.4 - training event layer
+                if enable_ev:
+
+                    # get relation output
+                    rel_preds = self.REL_layer(joint_input)
+
+                    # check non-empty relation
+                    if rel_preds['valid']:
+                        # call event layer
+                        ev_preds = self.EV_layer(joint_input, rel_preds, n_epoch)
+
+        # joint model loss
+        acc_loss = self._accumulate_loss(ner_preds, rel_preds, ev_preds, n_epoch)
+
+        return ner_preds, rel_preds, ev_preds, acc_loss
diff --git a/model/training.py b/model/training.py
new file mode 100644
index 0000000..6414bf9
--- /dev/null
+++ b/model/training.py
@@ -0,0 +1,225 @@
+import torch
+from tqdm import tqdm, trange
+
+import os
+import pickle
+
+from eval.evaluation import eval
+from utils import utils
+from utils.utils import debug, path
+from utils.utils import (
+    extract_scores,
+    is_best_epoch,
+    write_best_epoch,
+)
+
+
+# try:
+#     from apex import amp
+# except ImportError:
+#     pass
+
+
+def train(
+        train_data_loader,
+        dev_data_loader,
+        train_data,
+        dev_data,
+        params,
+        model,
+        optimizer
+):
+    is_params_saved = False
+    global_steps = 0
+
+    gradient_accumulation_steps = params["gradient_accumulation_steps"]
+
+    ner_prf_dev, rel_prf_dev, ev_prf_dev = [], [], []
+
+    ner_prf_dev_str, ner_prf_dev_sof, rel_prf_dev_str, rel_prf_dev_sof = [], [], [], []
+
+    # create output directory for results
+    result_dir = params['result_dir']
+    if not os.path.exists(result_dir):
+        os.makedirs(result_dir)
+
+    if params['freeze_ner']:
+        for p in model.NER_layer.parameters():
+            p.requires_grad = False
+
+    if params['freeze_rel']:
+        for p in model.REL_layer.parameters():
+            p.requires_grad = False
+
+    if params['freeze_bert']:
+        for p in model.NER_layer.bert.parameters():
+            p.requires_grad = False
+
+    # Save params:
+    if params['save_params']:
+        if not is_params_saved:
+            saved_params_path = result_dir + params['task_name'] + '.param'
+            with open(saved_params_path, "wb") as f:
+                pickle.dump(params, f)
+            # is_params_saved = True
+            print('SAVED PARAMETERS!')
+
+    for epoch in trange(int(params["epoch"]), desc="Epoch"):
+        # TRAIN loop
+        model.train()
+        tr_loss = 0
+        nb_tr_steps = 0
+
+        print()
+        print(
+            "====================================================================================================================")
+        print()
+        debug(f"[1] Epoch: {epoch}\n")
+
+        for step, batch in enumerate(
+                tqdm(train_data_loader, desc="Iteration", leave=False)
+        ):
+
+            # Start training batch
+            tr_data_ids = batch
+            tensors = utils.get_tensors(tr_data_ids, train_data, params)
+
+            ner_preds, rel_preds, ev_preds, loss = model(tensors, epoch)
+
+            if gradient_accumulation_steps > 1:
+                loss /= gradient_accumulation_steps
+
+            tr_loss += loss.item()
+            nb_tr_steps += 1
+
+            if loss != 0:
+                if params["fp16"]:
+                    with amp.scale_loss(loss, optimizer) as scaled_loss:
+                        scaled_loss.backward()
+                else:
+                    loss.backward()
+
+            if (step + 1) % params["gradient_accumulation_steps"] == 0:
+
+                optimizer.step()
+                optimizer.zero_grad()
+                global_steps += 1
+
+                # Clear GPU unused RAM:
+                if params['gpu'] >= 0:
+                    torch.cuda.empty_cache()
+
+        print()
+        debug(f"[2] Train loss: {tr_loss / nb_tr_steps}\n")
+        debug(f"[3] Global steps: {global_steps}\n")
+
+        print(
+            "+" * 10 + "RUN EVALUATION" + "+" * 10
+        )
+        ner_score, is_eval_rel, tr_scores, scores, ev_scores = eval(
+            model=model,
+            eval_dir=params['dev_data'],
+            result_dir=result_dir,
+            eval_dataloader=dev_data_loader,
+            eval_data=dev_data,
+            params=params,
+            epoch=epoch
+        )
+
+        ner_prf_dev.append(
+            [
+                float("{0:.2f}".format(ner_score[-1][1])),
+                float("{0:.2f}".format(ner_score[-1][2])),
+                float("{0:.2f}".format(ner_score[-1][3])),
+            ]
+        )
+        ner_prf_dev_str.append(
+            [
+                float("{0:.2f}".format(scores['NER']['micro']['st_p'])),
+                float("{0:.2f}".format(scores['NER']['micro']['st_r'])),
+                float("{0:.2f}".format(scores['NER']['micro']['st_f'])),
+            ]
+        )
+        ner_prf_dev_sof.append(
+            [
+                float("{0:.2f}".format(scores['NER']['micro']['so_p'])),
+                float("{0:.2f}".format(scores['NER']['micro']['so_r'])),
+                float("{0:.2f}".format(scores['NER']['micro']['so_f'])),
+            ]
+        )
+        extract_scores('DEV NER', ner_prf_dev)
+        ner_max_scores = extract_scores('n2c2 ner strict (micro)', ner_prf_dev_str)
+        extract_scores('n2c2 ner soft (micro)', ner_prf_dev_sof)
+
+        if is_eval_rel:
+            rel_prf_dev.append(
+                [
+                    float("{0:.2f}".format(tr_scores["micro_p"] * 100)),
+                    float("{0:.2f}".format(tr_scores["micro_r"] * 100)),
+                    float("{0:.2f}".format(tr_scores["micro_f"] * 100)),
+                ]
+            )
+            rel_prf_dev_str.append(
+                [
+                    float("{0:.2f}".format(scores['REL']['micro']['st_p'])),
+                    float("{0:.2f}".format(scores['REL']['micro']['st_r'])),
+                    float("{0:.2f}".format(scores['REL']['micro']['st_f'])),
+                ]
+            )
+            rel_prf_dev_sof.append(
+                [
+                    float("{0:.2f}".format(scores['REL']['micro']['so_p'])),
+                    float("{0:.2f}".format(scores['REL']['micro']['so_r'])),
+                    float("{0:.2f}".format(scores['REL']['micro']['so_f'])),
+                ]
+            )
+            extract_scores('DEV REL', rel_prf_dev)
+            rel_max_scores = extract_scores('n2c2 rel strict (micro)', rel_prf_dev_str)
+            extract_scores('n2c2 rel soft (micro)', rel_prf_dev_sof)
+        else:
+            rel_prf_dev.append(
+                [
+                    float("{0:.2f}".format(0)),
+                    float("{0:.2f}".format(0)),
+                    float("{0:.2f}".format(0)),
+                ]
+            )
+            rel_prf_dev_str.append(
+                [
+                    float("{0:.2f}".format(0)),
+                    float("{0:.2f}".format(0)),
+                    float("{0:.2f}".format(0)),
+                ]
+            )
+            rel_prf_dev_sof.append(
+                [
+                    float("{0:.2f}".format(0)),
+                    float("{0:.2f}".format(0)),
+                    float("{0:.2f}".format(0)),
+                ]
+            )
+        if len(ev_scores) > 0:
+            ev_prf_dev.append([ev_scores["p"], ev_scores["r"], ev_scores["f"]])
+            ev_max_scores = extract_scores('DEV EV', ev_prf_dev)
+            best_epoch = is_best_epoch(ev_prf_dev)
+            if best_epoch:
+                write_best_epoch(result_dir)
+        else:
+            ev_prf_dev.append(
+                [
+                    float("{0:.2f}".format(0)),
+                    float("{0:.2f}".format(0)),
+                    float("{0:.2f}".format(0)),
+                ]
+            )
+
+        # Clear GPU unused RAM:
+        if params['gpu'] >= 0:
+            torch.cuda.empty_cache()
+
+    # if params['optimize_type'] == 0:
+    #     return ner_max_scores
+    # elif params['optimize_type'] == 1:
+    #     return rel_max_scores
+    # else:
+    #     return ev_max_scores
diff --git a/nets/RELNet.py b/nets/RELNet.py
deleted file mode 100644
index 2d06602..0000000
--- a/nets/RELNet.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import torch
-import torch.nn.functional as f
-from torch import nn
-
-from eval.evalRE import calc_stats
-
-import math
-
-
-def gelu(x):
-    return 0.5 * x * (1 + torch.tanh(math.sqrt(math.pi / 2) * (x + 0.044715 * x ** 3)))
-
-
-class RELModel(nn.Module):
-
-    def __init__(self, params, sizes):
-        super(RELModel, self).__init__()
-
-        self.type_embed = nn.Embedding(num_embeddings=sizes['etype_size'] + 1,
-                                       embedding_dim=params['etype_dim'],
-                                       padding_idx=sizes['etype_size'])
-
-        ent_dim = params['bert_dim'] * 3 + params['etype_dim']  
-        
-
-        self.hidden_layer1 = nn.Linear(in_features=2 * ent_dim + params['bert_dim'],
-                                       out_features=params['hidden_dim'], bias=False)
-
-        self.hidden_layer2 = nn.Linear(in_features=params['hidden_dim'],
-                                       out_features=params['rel_reduced_size'], bias=False)
-
-        self.l_class = nn.Linear(in_features=params['rel_reduced_size'],
-                                 out_features=sizes['rel_size'])
-
-        self.device = params['device']
-        self.params = params
-        self.sizes = sizes
-
-    def embedding_layer(self, bert_out, ents_etype_):
-
-        self.b, self.w, _ = bert_out.shape
-        self.e = ents_etype_.shape[1]
-
-        ents_etype_[ents_etype_ == -1] = self.sizes['etype_size']
-        type_embeds = self.type_embed(ents_etype_)  # (B, E, 10).
-
-        return type_embeds
-
-    def pair_representation(self, ent_embeds, tr_ids, type_embeds):
-
-        pairs4class = torch.cat((ent_embeds, type_embeds), dim=2)
-
-        enttoks_type_embeds = pairs4class.clone()
-
-        return pairs4class, enttoks_type_embeds
-
-
-    def get_pairs(self, pairs4class, pair_context, pairs_idx, direction, use_gold, use_context):
-        indices = pairs_idx
-
-        if direction == 'lr':
-            if use_context:
-                return torch.cat(
-                    (pairs4class[(indices[0], indices[1])], pairs4class[(indices[0], indices[2])], pair_context),
-                    dim=-1)
-            else:
-                return torch.cat((pairs4class[(indices[0], indices[1])], pairs4class[(indices[0], indices[2])]), dim=-1)
-        else:
-            if use_context:
-                return torch.cat(
-                    (pairs4class[(indices[0], indices[2])], pairs4class[(indices[0], indices[1])], pair_context),
-                    dim=-1)
-            else:
-                return torch.cat((pairs4class[(indices[0], indices[2])], pairs4class[(indices[0], indices[1])]), dim=-1)
-
-    def classification(self, pairs4class, pairs_idx_, sent_embeds):
-
-    
-        if self.params['predict']:
-            
-            pair_context = sent_embeds[pairs_idx_[0]]
-               
-            l2r_pairs = self.get_pairs(pairs4class, pair_context, pairs_idx_, 'lr', False,
-                                           self.params['use_context'])
-
-        l2r_pairs = gelu(self.hidden_layer1(l2r_pairs))
-        l2r_pairs = gelu(self.hidden_layer2(l2r_pairs))
-
-        pairs_preds_l2r = self.l_class(l2r_pairs)  # (B*r, N)
-
-
-        if self.params['direction'] != 'l2r':
-            
-            if self.params['predict']:
-                pair_context = sent_embeds[pairs_idx_[0]]
-                r2l_pairs = self.get_pairs(pairs4class, pair_context, pairs_idx_, 'rl', False,
-                                               self.params['use_context'])
-               
-
-            r2l_pairs = gelu(self.hidden_layer1(r2l_pairs))
-            r2l_pairs = gelu(self.hidden_layer2(r2l_pairs))
-
-            pairs_preds_r2l = self.l_class(r2l_pairs)
-
-           
-
-            return pairs_preds_l2r, pairs_preds_r2l, l2r_pairs, r2l_pairs, pairs4class, pairs_idx_
-        else:
-            return pairs_preds_l2r, pairs4class, pairs_idx_
-
-    def calculate(self, batch_input):
-        type_embeds = self.embedding_layer(batch_input['embeddings'], batch_input['ent_types'])
-
-        sent_embeds = batch_input['sentence_embeds']
-
-        pairs4class, enttoks_type_embeds = self.pair_representation(
-            ent_embeds=batch_input['ent_embeds'], tr_ids=batch_input['tr_ids'],
-            type_embeds=type_embeds)
-
-        pairs4class = pairs4class.view(self.b, self.e, pairs4class.shape[2])
-
-        forw_comp_res = self.classification(pairs4class=pairs4class,
-                                            pairs_idx_=batch_input['pairs_idx'],
-                                            sent_embeds=sent_embeds)
-
-        return forw_comp_res, enttoks_type_embeds
-
-    def forward(self, batch_input):
-        if len(batch_input['pairs_idx']) > 0:
-            fcomp_res, enttoks_type_embeds = self.calculate(batch_input)
-
-            if self.params['direction'] != 'lr2':
-                preds_l2r, preds_r2l, l2r_pairs, r2l_pairs, pair4class, pairs_idx = fcomp_res
-                preds = (f.softmax(preds_l2r, dim=1).data, f.softmax(preds_r2l, dim=1).data)
-            else:
-                preds_l2r, l2r_pairs, pair4class, pairs_idx, positive_indices = fcomp_res
-                preds = f.softmax(preds_l2r, dim=1).data
-
-            new_preds = calc_stats(preds,self.params)
-
-            return {'next': True,
-                    'preds': new_preds, 'enttoks_type_embeds': enttoks_type_embeds,
-                    'pairs_idx': pairs_idx, 'rel_embeds': l2r_pairs,
-                    'pair4class': pair4class}
-
-        else:
-            return {'next': False}
diff --git a/nets/deepEM.py b/nets/deepEM.py
deleted file mode 100644
index 33293e2..0000000
--- a/nets/deepEM.py
+++ /dev/null
@@ -1,298 +0,0 @@
-from collections import defaultdict
-
-import numpy as np
-import torch
-import torch.nn.functional as f
-from torch import nn
-
-from nets import EVNet
-from nets import RELNet
-from nets.NERNet import NestedNERModel
-from utils import utils
-
-cpu_device = torch.device("cpu")
-
-
-class DeepEM(nn.Module):
-    """
-    Network architecture
-    """
-
-    def __init__(self, params):
-        super(DeepEM, self).__init__()
-
-        sizes = params['voc_sizes']
-        device = params['device']
-
-        self.NER_layer = NestedNERModel.from_pretrained(params['bert_model'], params=params)
-        self.REL_layer = RELNet.RELModel(params, sizes)
-        self.EV_layer = EVNet.EVModel(params, sizes)
-
-        self.trigger_id = -1
-
-        if params['train']:
-            self.beta = 1
-        else:
-            self.beta = params['beta']
-
-        self.device = device
-        self.params = params
-
-    def is_tr(self, label):
-        nn_tr_types_ids = self.params['mappings']['nn_mapping']['trTypes_Ids']
-        return label in nn_tr_types_ids
-
-    def generate_entity_pairs_4rel(self, bert_out, preds):
-
-        lbls = preds
-
-        labeled_spans = (lbls > 0).nonzero().transpose(0, 1).long()
-
-        ent_types = torch.full((lbls.shape[0], lbls.shape[1]), -1, dtype=torch.int64, device=self.device)
-
-        e_ids = torch.zeros((lbls.shape[0], lbls.shape[1]), dtype=torch.long)
-        tr_ids = torch.zeros((lbls.shape), dtype=torch.int64, device=self.device)
-
-        batch_eids_list = defaultdict(list)
-        trig_list = []
-
-        # store only entity in each batch
-        batch_ent_list = defaultdict(list)
-
-        for idx, i in enumerate(labeled_spans[0]):
-            j = labeled_spans[1][idx]
-            type_a1 = self.params['mappings']['nn_mapping']['tag2type_map'][lbls[i][j].item()]
-            ent_types[i][j] = torch.tensor(type_a1, device=self.device)
-            e_ids[i][j] = 1
-
-            if type_a1 in self.params['trTypes_Ids']:
-                tr_ids[i][j] = 1
-                trig_list.append((i, j))
-            else:
-                batch_ent_list[i.item()].append(j)
-
-            batch_eids_list[i.item()].append(j)
-
-        ent_embeds = bert_out.clone()
-        tr_embeds = bert_out.clone()
-        ent_embeds[e_ids == 0] = torch.zeros((bert_out.shape[2]), dtype=bert_out.dtype, device=self.device)
-        tr_embeds[tr_ids == 0] = torch.zeros((bert_out.shape[2]), dtype=bert_out.dtype, device=self.device)
-
-        pairs_idx = []
-
-        if len(trig_list):
-            for batch_id, trig_id in trig_list:
-                if len(batch_eids_list[batch_id.item()]) > 1:
-
-                    # enable relation between triggers
-                    if self.params['enable_triggers_pair']:
-                        # get all entity ids in this batch
-                        b_eids = batch_eids_list[batch_id.item()].copy()
-
-                        # remove this trigger to avoid self relation
-                        b_eids.remove(trig_id.clone().detach())
-
-                    # or only between trigger and entity
-                    else:
-                        # pair with only entity
-                        b_eids = batch_ent_list[batch_id.item()].copy()
-
-                    # check empty
-                    if len(b_eids) > 0:
-                        # make pairs
-                        batch_pair_idx = torch.tensor([[batch_id], [trig_id]]).repeat(1, len(b_eids))
-                        batch_pair_idx = torch.cat(
-                            (batch_pair_idx, torch.tensor(b_eids).view(1, len(b_eids))), dim=0)
-
-                        # add to pairs
-                        pairs_idx.append(batch_pair_idx)
-
-            if len(pairs_idx) > 0:
-                pairs_idx = torch.cat(pairs_idx, dim=-1)
-
-        return ent_embeds, tr_embeds, ent_types, tr_ids, pairs_idx
-
-    def calculate(self, batch_input):
-
-        # for output
-        ner_out = {}
-
-        # input
-        nn_tokens, nn_ids, nn_token_mask, nn_attention_mask, nn_span_indices, nn_span_labels, nn_span_labels_match_rel, nn_entity_masks, nn_trigger_masks, span_terms, \
-        etypes, max_span_labels = batch_input
-
-        # predict entity
-        e_preds, e_golds, sentence_sections, span_masks, embeddings, sentence_emb, trigger_indices = self.NER_layer(
-            all_tokens=nn_tokens,
-            all_ids=nn_ids,
-            all_token_masks=nn_token_mask,
-            all_attention_masks=nn_attention_mask,
-            all_entity_masks=nn_entity_masks,
-            all_trigger_masks=nn_trigger_masks,
-            all_span_labels=nn_span_labels,
-        )
-
-        # run on CPU
-        sentence_sections = sentence_sections.detach().cpu().numpy()[:-1]
-        all_span_masks = span_masks.detach() > 0
-
-        # Embedding of each span
-        embeddings = torch.split(embeddings, torch.sum(all_span_masks, dim=-1).tolist())
-
-        # Pred of each span
-        e_preds = np.split(e_preds.astype(int), sentence_sections)
-        e_preds = [pred.flatten() for pred in e_preds]
-        ner_out['preds'] = e_preds
-
-        e_golds = np.split(e_golds.astype(int), sentence_sections)
-        e_golds = [gold.flatten() for gold in e_golds]
-
-        # predict both entity and trigger
-        if self.params["ner_predict_all"]:
-            for items in span_terms:
-                items.term2id.clear()
-                items.id2term.clear()
-
-            # Overwrite triggers
-            if self.trigger_id == -1:
-                self.trigger_id = utils.get_max_entity_id(span_terms) + 10000
-
-            trigger_idx = self.trigger_id + 1
-            for sentence_idx, span_preds in enumerate(e_preds):
-                for pred_idx, label_id in enumerate(span_preds):
-                    if label_id > 0:
-                        term = "T" + str(trigger_idx)
-
-                        # check trigger
-                        if label_id in self.params['mappings']['nn_mapping']['trTypes_Ids']:
-                            term = "TR" + str(trigger_idx)
-
-                        span_terms[sentence_idx].id2term[pred_idx] = term
-                        span_terms[sentence_idx].term2id[term] = pred_idx
-                        trigger_idx += 1
-
-            self.trigger_id = trigger_idx
-
-        # given gold entity, predict trigger only
-        else:
-            # Overwrite triggers
-            if self.trigger_id == -1:
-                self.trigger_id = utils.get_max_entity_id(span_terms) + 10000
-
-            trigger_idx = self.trigger_id + 1
-            for sentence_idx, span_preds in enumerate(e_preds):
-
-                # store gold entity index (a1)
-                a1ent_set = set()
-
-                for span_idx, span_term in span_terms[sentence_idx].id2term.items():
-
-                    # replace for entity (using gold entity label)
-                    if span_term != "O" and not span_term.startswith("TR") and span_preds[span_idx] != 255:
-
-                        # but do not replace for entity in a2 files
-                        span_label = span_terms[sentence_idx].id2label[span_idx]
-                        if span_label not in self.params['a2_entities']:
-                            span_preds[span_idx] = e_golds[sentence_idx][span_idx]
-
-                            # save this index to ignore prediction
-                            a1ent_set.add(span_idx)
-
-                for pred_idx, label_id in enumerate(span_preds):
-                    span_term = span_terms[sentence_idx].id2term.get(pred_idx, "O")
-
-                    # if this entity in a1: skip this span
-                    if pred_idx in a1ent_set:
-                        continue
-
-                    remove_span = False
-
-                    # add prediction for trigger or entity a2
-                    if label_id > 0:
-
-                        term = ''
-
-                        # is trigger
-                        if self.is_tr(label_id):
-                            term = "TR" + str(trigger_idx)
-
-                        # is entity
-                        else:
-                            etype_label = self.params['mappings']['nn_mapping']['id_tag_mapping'][label_id]
-
-                            # check this entity type in a2 or not
-                            if etype_label in self.params['a2_entities']:
-                                term = "T" + str(trigger_idx)
-                            else:
-                                remove_span = True
-
-                        if len(term) > 0:
-                            span_terms[sentence_idx].id2term[pred_idx] = term
-                            span_terms[sentence_idx].term2id[term] = pred_idx
-                            trigger_idx += 1
-
-                    # null prediction
-                    if label_id == 0 or remove_span:
-
-                        # do not write anything
-                        span_preds[pred_idx] = 0
-
-                        # remove this span
-                        if span_term.startswith("T"):
-                            del span_terms[sentence_idx].id2term[pred_idx]
-                            del span_terms[sentence_idx].term2id[span_term]
-
-                span_preds[span_preds == 255] = 0
-            self.trigger_id = trigger_idx
-
-        num_padding = max_span_labels * self.params["ner_label_limit"]
-
-        e_preds = [np.pad(pred, (0, num_padding - pred.shape[0]),
-                          'constant', constant_values=-1) for pred in e_preds]
-        e_golds = [np.pad(gold, (0, num_padding - gold.shape[0]),
-                          'constant', constant_values=-1) for gold in e_golds]
-
-        e_preds = torch.tensor(e_preds, device=self.device)
-
-        embeddings = [f.pad(embedding, (0, 0, 0, max_span_labels - embedding.shape[0]),
-                            'constant', value=0) for embedding in embeddings]
-
-        embeddings = torch.stack(embeddings)
-        embeddings = embeddings.unsqueeze(dim=2).expand(-1, -1, self.params["ner_label_limit"], -1)
-        embeddings = embeddings.reshape(embeddings.size(0), -1, embeddings.size(-1))
-
-        ent_embeds, tr_embeds, ent_types, tr_ids, pairs_idx = self.generate_entity_pairs_4rel(
-            embeddings,
-            preds=e_preds
-        )
-        ner_preds = {'preds': e_preds, 'golds': e_golds, 'embeddings': embeddings,
-                     'ent_embeds': ent_embeds, 'tr_embeds': tr_embeds, 'tr_ids': tr_ids,
-                     'ent_types': ent_types, 'pairs_idx': pairs_idx, 'e_types': etypes.long(),
-                     'sentence_embeds': sentence_emb}
-
-        rel_preds = self.REL_layer(ner_preds)
-        if rel_preds['next']:
-
-            ev_preds, empty_pred = self.EV_layer(ner_preds, rel_preds)
-
-            if empty_pred == True:
-                ev_preds = None
-
-
-        else:
-            rel_preds = None
-            ev_preds = None
-
-        ner_out['terms'] = span_terms
-        ner_out['span_indices'] = nn_span_indices
-
-        nner_preds = e_preds.detach().cpu().numpy()
-        ner_out['nner_preds'] = nner_preds
-
-        return ner_out, rel_preds, ev_preds
-
-    def forward(self, batch_input, parameters):
-
-        ner_preds, rel_preds, ev_preds = self.calculate(batch_input)
-
-        return ner_preds, rel_preds, ev_preds
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..7163e15
--- /dev/null
+++ b/train.py
@@ -0,0 +1,215 @@
+import os
+import random
+import time
+
+import numpy as np
+import torch
+from torch.utils.data import (
+    DataLoader,
+    RandomSampler,
+    SequentialSampler,
+    TensorDataset,
+)
+
+from model import training
+
+from loader.prepData import prepdata
+from loader.prepNN import mapping
+from loader.prepNN import prep4nn
+
+from bert.optimization import BertAdam
+from model import deepEM
+from utils import utils
+
+
+def main():
+    # check running time
+    t_start = time.time()
+
+    # set config path by command line
+    inp_args = utils._parsing()
+    config_path = getattr(inp_args, 'yaml')
+
+    # set config path manually
+    # config_path = 'configs/default.yaml'
+
+    with open(config_path, 'r') as stream:
+        parameters = utils._ordered_load(stream)
+
+    # print config
+    utils._print_config(parameters, config_path)
+
+    parameters['learning_rate'] = float(parameters['learning_rate'])
+
+    if parameters['gpu'] >= 0:
+        device = torch.device("cuda:" + str(parameters['gpu']) if torch.cuda.is_available() else "cpu")
+        torch.cuda.set_device(parameters['gpu'])
+    else:
+        device = torch.device("cpu")
+
+    print('device', device)
+
+    parameters['device'] = device
+
+    # Fix seed for reproducibility
+    os.environ["PYTHONHASHSEED"] = str(parameters['seed'])
+    random.seed(parameters['seed'])
+    np.random.seed(parameters['seed'])
+    torch.manual_seed(parameters['seed'])
+
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+    # Init needed params
+    parameters['max_ev_per_batch'] = 0
+    parameters['max_ev_per_layer'] = 0
+    parameters['max_rel_per_ev'] = 0
+    parameters['max_ev_per_tr'] = 0
+
+    # Force predict = False
+    parameters['predict'] = False
+
+    # 1. process data
+    train_data = prepdata.prep_input_data(parameters['train_data'], parameters)
+    dev_data = prepdata.prep_input_data(parameters['dev_data'], parameters)
+
+    # fix bug for mlee
+    test_data = prepdata.prep_input_data(parameters['test_data'], parameters)
+
+    # mapping
+    parameters = mapping.generate_map(train_data, dev_data, test_data, parameters)  # add test data for mlee
+    if len(parameters['mappings']['rel_map']) > 0:
+        parameters = mapping.find_ignore_label(parameters)
+
+    # nner:
+    parameters['mappings']['nn_mapping'] = utils.gen_nn_mapping(parameters['mappings']['tag_map'],
+                                                                parameters['mappings']['tag2type_map'],
+                                                                parameters['trTypes_Ids'])
+
+    train, train_events_map = prep4nn.data2network(train_data, 'train', parameters)
+    dev, dev_events_map = prep4nn.data2network(dev_data, 'demo', parameters)
+
+    if len(train) == 0:
+        raise ValueError("Train set empty.")
+    if len(dev) == 0:
+        raise ValueError("Test set empty.")
+
+    # For ranking REL labels weight
+    parameters['statistics'] = {'rel': np.zeros(parameters['voc_sizes']['rel_size'])}
+
+    train_data = prep4nn.torch_data_2_network(cdata2network=train, events_map=train_events_map, params=parameters,
+                                              do_get_nn_data=True)
+    dev_data = prep4nn.torch_data_2_network(cdata2network=dev, events_map=dev_events_map, params=parameters,
+                                            do_get_nn_data=True)
+
+    trn_data_size = len(train_data['nn_data']['ids'])
+    dev_data_size = len(dev_data['nn_data']['ids'])
+
+    train_data_ids = TensorDataset(torch.arange(trn_data_size))
+    dev_data_ids = TensorDataset(torch.arange(dev_data_size))
+    train_sampler = RandomSampler(train_data_ids)
+    train_dataloader = DataLoader(train_data_ids, sampler=train_sampler, batch_size=parameters['batchsize'])
+    dev_sampler = SequentialSampler(dev_data_ids)
+    dev_dataloader = DataLoader(dev_data_ids, sampler=dev_sampler, batch_size=parameters['batchsize'])
+
+    # 2. model
+    model = deepEM.DeepEM(parameters)
+
+    # Continue training joint model
+    if not parameters['predict']:
+        # Load pre-trained models
+        if 'joint_model_dir' in parameters:
+            print('Continue training joint model from', parameters['joint_model_dir'])
+            utils.handle_checkpoints(model=model,
+                                     checkpoint_dir=parameters['joint_model_dir'],
+                                     params={
+                                         'device': device
+                                     },
+                                     resume=True)
+        if 'ner_model_dir' in parameters:
+            print('pre-load NER model from', parameters['ner_model_dir'])
+            utils.handle_checkpoints(model=model.NER_layer,
+                                     checkpoint_dir=parameters['ner_model_dir'],
+                                     params={
+                                         'device': device
+                                     },
+                                     resume=True)
+
+        if 'rel_model_dir' in parameters:
+            print('pre-load REL model from', parameters['rel_model_dir'])
+            utils.handle_checkpoints(model=model.REL_layer,
+                                     checkpoint_dir=parameters['rel_model_dir'],
+                                     params={
+                                         'device': device
+                                     },
+                                     resume=True)
+
+        if 'ev_model_dir' in parameters:
+            print('pre-load EV model from', parameters['ev_model_dir'])
+            utils.handle_checkpoints(model=model.EV_layer,
+                                     checkpoint_dir=parameters['ev_model_dir'],
+                                     params={
+                                         'device': device
+                                     },
+                                     resume=True)
+
+    # 3. optimizer
+    assert (
+            parameters['gradient_accumulation_steps'] >= 1
+    ), "Invalid gradient_accumulation_steps parameter, should be >= 1."
+
+    parameters['batchsize'] //= parameters['gradient_accumulation_steps']
+
+    num_train_steps = parameters['epoch'] * (
+            (trn_data_size - 1) // (parameters['batchsize'] * parameters['gradient_accumulation_steps']) + 1)
+    parameters['voc_sizes']['num_train_steps'] = num_train_steps
+
+    model.to(device)
+
+    # Prepare optimizer
+
+    ner_params, rel_params, ev_params = utils.partialize_optimizer_models_parameters(model)
+    param_optimizers = ner_params
+    optimizer_grouped_parameters = utils.gen_optimizer_grouped_parameters(param_optimizers, "ner", parameters)
+    rel_grouped_params = utils.gen_optimizer_grouped_parameters(rel_params, "rel", parameters)
+    ev_grouped_params = utils.gen_optimizer_grouped_parameters(ev_params, "ev", parameters)
+
+    if parameters['bert_warmup_lr']:
+        t_total = num_train_steps
+    else:
+        t_total = -1
+
+    optimizer = BertAdam(
+        optimizer_grouped_parameters,
+        lr=parameters['learning_rate'],
+        warmup=parameters['warmup_proportion'],
+        t_total=t_total
+    )
+
+    optimizer.add_param_group(rel_grouped_params[0])
+    optimizer.add_param_group(rel_grouped_params[1])
+    optimizer.add_param_group(ev_grouped_params[0])
+    optimizer.add_param_group(ev_grouped_params[1])
+
+    if parameters['train']:
+        # 4. training
+
+        if parameters['fp16']:
+            model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
+
+        training.train(train_data_loader=train_dataloader, dev_data_loader=dev_dataloader,
+                       train_data=train_data, dev_data=dev_data, params=parameters, model=model,
+                       optimizer=optimizer)
+
+    print('TRAINING: DONE!')
+
+    # calculate running time
+    t_end = time.time()
+    print('TOTAL RUNNING TIME: {}'.format(utils._humanized_time(t_end - t_start)))
+
+    return
+
+
+if __name__ == '__main__':
+
+    main()

From b3f7e617760cb3360fec26e2804aa49be7bf1896 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:35:36 +0900
Subject: [PATCH 15/70] training

---
 README.md          |  19 ++
 utils/c2t_utils.py | 245 +++++++++++++++++++
 utils/utils.py     | 574 +++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 797 insertions(+), 41 deletions(-)
 create mode 100644 utils/c2t_utils.py

diff --git a/README.md b/README.md
index d9a3eff..84fc031 100644
--- a/README.md
+++ b/README.md
@@ -93,6 +93,25 @@ sh run/train/generate_configs.sh cg basic
 sh run/train/generate_configs-debug.sh cg debug
 ```
 
+4. Training
+- Pretrain layers (these need to be done before training the joint model)
+```bash
+sh run.sh cg basic train ner
+sh run.sh cg basic train rel
+sh run.sh cg basic train ev
+```
+
+- Train joint model: given gold entity
+```bash
+sh run.sh cg basic train joint gold
+```
+
+- Train joint end-to-end model
+
+```bash
+sh run.sh cg basic train joint e2e
+```
+
 # 3. Predict (BioNLP tasks)
 
 ## 3.1. Prepare data
diff --git a/utils/c2t_utils.py b/utils/c2t_utils.py
new file mode 100644
index 0000000..c0d6081
--- /dev/null
+++ b/utils/c2t_utils.py
@@ -0,0 +1,245 @@
+import torch
+
+# C2T: Using to padding
+MAX_NESTED = 4
+PAIR_SIZE = 2
+
+# Type padding
+TYPE_TRID = 0
+TYPE_ARG1 = 1
+TYPE_ARG2 = 2
+TYPE_LBL = 4
+TYPE_TR0ID = 5
+
+
+def _is_contain(input, target):
+    """ C2T: Check and return the index if Tensor target(list) contains Tensor input
+    """
+    for i, e in enumerate(target):
+        if torch.all(torch.eq(e, input)):
+            return i
+    return -1
+
+
+# C2T: Padding
+# C2T
+def _truncate(arr, max_length):
+    while True:
+        total_length = len(arr)
+        if total_length <= max_length:
+            break
+        else:
+            arr.pop()
+
+
+def _padding(arr, max_length, padding_idx=-1):
+    while len(arr) < max_length:
+        arr.append(padding_idx)
+
+
+def _to_tensor(arr, params):
+    return torch.tensor(arr, device=params['device'])
+
+
+def _to_torch_data(arr, max_length, params, padding_idx=-1):
+    for e in arr:
+        _truncate(e, max_length)
+        _padding(e, max_length, padding_idx=padding_idx)
+    return _to_tensor(arr, params)
+
+
+def _padding_rels(rels, max_rel_per_event):
+    """ C2T: Padding relations
+    """
+    padded_rels = []
+    for rel in rels:
+        padded_rels.append(rel)
+    while len(padded_rels) < max_rel_per_event:
+        padded_rels.append([-1] * PAIR_SIZE)
+    return padded_rels
+
+
+def _padding_cell_1_value(val, cols, rows, padding_val=-1):
+    """ C2T: Padding cells that only have 1 value
+        """
+    padded_cell = []
+    padded_row = []
+    padded_row.append(val)
+    while len(padded_row) < cols:
+        padded_row.append(padding_val)
+    padded_cell.append(padded_row)
+    while len(padded_cell) < rows:
+        padded_cell.append([padding_val] * cols)
+    return padded_cell
+
+
+def _padding_even(even, max_rel_per_event):
+    max_cell = 4
+    padding_val = -1
+    padded_even = []
+    # padding trid
+    trid = even[0]
+    padded_trid = [_padding_cell_1_value(trid[0], PAIR_SIZE, max_rel_per_event, padding_val=padding_val),
+                   _padding_cell_1_value(trid[1], PAIR_SIZE, max_rel_per_event, padding_val=padding_val)]
+    while len(padded_trid) < max_cell:
+        padded_trid.append(_padding_cell_1_value(padding_val, PAIR_SIZE, max_rel_per_event, padding_val=padding_val))
+    # print('padded_trid', padded_trid)
+    padded_even.append(padded_trid)
+    # padding arg1
+    arg1 = even[1]
+    padded_arg1 = []
+    for e in arg1:
+        padded_arg1.append(_padding_cell_1_value(e, PAIR_SIZE, max_rel_per_event, padding_val=padding_val))
+    # print('padded_arg1', padded_arg1)
+    padded_even.append(padded_arg1)
+    # padding arg2
+    arg2 = even[2]
+    padded_arg2 = []
+    for e in arg2:
+        padded_arg2.append(_padding_cell_1_value(e, PAIR_SIZE, max_rel_per_event, padding_val=padding_val))
+    while len(padded_arg2) < max_cell:
+        padded_arg2.append(_padding_cell_1_value(padding_val, PAIR_SIZE, max_rel_per_event, padding_val=padding_val))
+    # print('padded_arg2', padded_arg2)
+    padded_even.append(padded_arg2)
+    # padding r
+    r = even[3]
+    padded_r = []
+    for e in r:
+        if e != -1:
+            padded_r.append(_padding_rels([e], max_rel_per_event))
+        else:
+            padded_r.append(_padding_cell_1_value(padding_val, PAIR_SIZE, max_rel_per_event, padding_val=padding_val))
+    while len(padded_r) < max_cell:
+        padded_r.append(_padding_cell_1_value(padding_val, PAIR_SIZE, max_rel_per_event, padding_val=padding_val))
+    padded_even.append(padded_r)
+    # padding label
+    lbl = even[4]
+    padded_lbl = [_padding_cell_1_value(lbl, PAIR_SIZE, max_rel_per_event, padding_val=padding_val)]
+    while len(padded_lbl) < max_cell:
+        padded_lbl.append(_padding_cell_1_value(padding_val, PAIR_SIZE, max_rel_per_event, padding_val=padding_val))
+    padded_even.append(padded_lbl)
+
+    # padding tr0id
+    if len(even) > 5:
+        tr0id = even[5]
+        padded_tr0id = []
+        for e in tr0id:
+            padded_tr0id.append(_padding_cell_1_value(e, PAIR_SIZE, max_rel_per_event, padding_val=padding_val))
+        while len(padded_tr0id) < max_cell:
+            padded_tr0id.append(
+                _padding_cell_1_value(padding_val, PAIR_SIZE, max_rel_per_event, padding_val=padding_val))
+        padded_even.append(padded_tr0id)
+
+    return padded_even
+
+
+def _padding_truth_ev(arr, max_rel_per_event, max_ev_per_layer, max_ev_per_batch):
+    padded_arr = []
+    for row in arr:
+        padded_row = []
+        for cell in row:
+            padded_cell = []
+            if cell != -1:
+                for ev in cell:
+                    r_e = ev[0]
+                    e_e = ev[1]
+                    padded_r_e = []
+                    for r_e_e in r_e:
+                        while len(r_e_e) < max_rel_per_event:
+                            r_e_e = r_e_e + [-1]
+                        padded_r_e.append(r_e_e)
+                    while len(padded_r_e) < max_rel_per_event:
+                        padded_r_e.append([-1] * max_rel_per_event)
+                    while len(e_e) < max_rel_per_event:
+                        e_e = e_e + [-1]
+                    padded_r_e.append(e_e)
+                    padded_cell.append(padded_r_e)
+                while len(padded_cell) < max_ev_per_layer:
+                    padded_cell.append([[-1] * max_rel_per_event] * (max_rel_per_event + 1))
+            else:
+                padded_cell = [[[-1] * max_rel_per_event] * (max_rel_per_event + 1)] * max_ev_per_layer
+            padded_row.append(padded_cell)
+        padded_arr.append(padded_row)
+    while len(padded_arr) < max_ev_per_batch:
+        padded_arr.append([[[[-1] * max_rel_per_event] * (max_rel_per_event + 1)] * max_ev_per_layer] * MAX_NESTED)
+    return padded_arr
+
+
+def _flatten_structs_type_ev(arr, max_rel_per_event, max_ev_per_tr):
+    padded_arr = []
+    for cell in arr:
+        padded_cell = []
+        if cell != -1:
+            for ev in cell:
+                padded_r_e = []
+                for r_e_e in ev:
+                    # while len(r_e_e) < PAIR_SIZE:
+                    #     r_e_e = r_e_e + [-1]
+                    padded_r_e.append(r_e_e)
+                while len(padded_r_e) < max_rel_per_event:
+                    padded_r_e.append([-1] * PAIR_SIZE)
+                # if len(padded_r_e) > 2:
+                padded_cell.append(padded_r_e)
+            while len(padded_cell) < max_ev_per_tr:
+                padded_cell.append([[-1] * PAIR_SIZE] * max_rel_per_event)
+        else:
+            padded_cell = [[[-1] * PAIR_SIZE] * max_rel_per_event] * max_ev_per_tr
+        padded_arr.append(padded_cell)
+    return padded_arr
+
+
+def _padding_even_cd(even_cd, max_rel_per_event, dtype, device=torch.device("cpu")):
+    padded_even = []
+    r_part = even_cd[0]
+    e_part = even_cd[1]
+    for r in r_part:
+        padded_r = []
+        for r_e in r:
+            padded_r.append(r_e)
+        while len(padded_r) < max_rel_per_event:
+            padded_r.append(-1)
+        padded_even.append(padded_r)
+    while len(padded_even) < max_rel_per_event:
+        padded_even.append([-1] * max_rel_per_event)
+    padded_e = []
+    for e in e_part:
+        padded_e.append(e)
+    while len(padded_e) < max_rel_per_event:
+        padded_e.append(-1)
+    padded_even.append(padded_e)
+    return torch.tensor(padded_even, dtype=dtype, device=device)
+
+
+# C2T: Un-padding
+def _unpadding_cell_1_value(padded_cell, cols, rows, device, padding_val=-1, replacing_padding=-1):
+    padding_cell = torch.tensor([[padding_val] * cols] * rows, device=device)
+    if torch.all(torch.eq(padded_cell.long(), padding_cell)):
+        return replacing_padding
+    else:
+        return padded_cell[0][0]
+
+
+def _unpadding_even_element(padded_ev_e, max_rel_per_event, device, type_padding=0, replacing_padding=-1):
+    unpadded_ev_e = []
+    padding_val = -1
+    if type_padding == TYPE_TRID:
+        valid_idx = 1
+    elif type_padding == TYPE_ARG1:
+        valid_idx = 3
+    elif type_padding == TYPE_ARG2:
+        valid_idx = 2
+    elif type_padding == TYPE_LBL:
+        valid_idx = 0
+    elif type_padding == TYPE_TR0ID:
+        valid_idx = 2
+
+    for e in padded_ev_e:
+        unpadded_e = []
+        for i, cell in enumerate(e):
+            cell = cell.to(device)
+            if i <= valid_idx:
+                unpadded_e.append(
+                    _unpadding_cell_1_value(cell, PAIR_SIZE, max_rel_per_event, device, padding_val=padding_val,
+                                            replacing_padding=replacing_padding))
+        unpadded_ev_e.append(unpadded_e)
+    return torch.tensor(unpadded_ev_e, device=device)
diff --git a/utils/utils.py b/utils/utils.py
index 63983f9..e52edd5 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -7,40 +7,24 @@
 import pprint
 import random
 import re
+import shutil
 from collections import OrderedDict
 from datetime import datetime
 from glob import glob
+import math
 
 import numpy as np
 import torch
+# C2T
 import yaml
 
-logger = logging.getLogger(__name__)
-
-
-def _to_torch_data(arr, max_length, params, padding_idx=-1):
-    for e in arr:
-        _truncate(e, max_length)
-        _padding(e, max_length, padding_idx=padding_idx)
-    return _to_tensor(arr, params)
-
-
-def _truncate(arr, max_length):
-    while True:
-        total_length = len(arr)
-        if total_length <= max_length:
-            break
-        else:
-            arr.pop()
+from utils import c2t_utils
 
-
-def _padding(arr, max_length, padding_idx=-1):
-    while len(arr) < max_length:
-        arr.append(padding_idx)
+logger = logging.getLogger(__name__)
 
 
-def _to_tensor(arr, params):
-    return torch.tensor(arr, device=params['device'])
+def gelu(x):
+    return 0.5 * x * (1 + torch.tanh(math.sqrt(math.pi / 2) * (x + 0.044715 * x ** 3)))
 
 
 def path(*paths):
@@ -49,7 +33,7 @@ def path(*paths):
 
 def make_dirs(*paths):
     os.makedirs(path(*paths), exist_ok=True)
-    
+
 
 def makedir(dir):
     if not os.path.exists(dir):
@@ -74,6 +58,14 @@ def _parsing():
     return args
 
 
+def _parsing_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--yaml', type=str, required=True, help='yaml file')
+    parser.add_argument('--opt', type=str, required=True, help='yaml opt file')
+    args = parser.parse_args()
+    return args
+
+
 def _ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict):
     """
         Load parameters from yaml in order
@@ -89,9 +81,113 @@ def construct_mapping(loader, node):
     OrderedLoader.add_constructor(
         yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
         construct_mapping)
+
+    # print(dict(yaml.load(stream, OrderedLoader).items()))
+
     return yaml.load(stream, OrderedLoader)
 
 
+def _print_config(config, config_path):
+    """Print config in dictionary format"""
+    print("\n====================================================================\n")
+    print('RUNNING CONFIG: ', config_path)
+    print('TIME: ', datetime.now())
+
+    for key, value in config.items():
+        print(key, value)
+
+    return
+
+
+def dicard_invalid_nes(terms, sentences):
+    """
+    Discard incomplete tokenized entities.
+    """
+    text = ' '.join(sentences)
+    valid_terms = []
+    count = 0
+    for term in terms:
+        start, end = int(term[2]), int(term[3])
+        if start == 0:
+            if text[end] == ' ':
+                valid_terms.append(term)
+            else:
+                count += 1
+            #    print('Context:{}\t{}'.format(text[start:end + 1], term))
+        elif text[start - 1] == ' ' and text[end] == ' ':
+            valid_terms.append(term)
+        else:
+            count += 1
+        #    print('Context:{}\t{}'.format(text[start-1:end+1], term))
+    return valid_terms, count
+
+
+def _humanized_time(second):
+    """
+        Returns a human readable time.
+    """
+    m, s = divmod(second, 60)
+    h, m = divmod(m, 60)
+    return "%dh %02dm %02ds" % (h, m, s)
+
+
+def is_best_epoch(prf_):
+    fs = []
+    for epoch, (p, r, f) in enumerate(prf_):
+        fs.append(f)
+
+    if len(fs) == 1:
+        return True
+
+    elif max(fs[:-1]) < fs[-1]:
+        return True
+
+    else:
+        return False
+
+
+def extract_scores(task, prf_):
+    ps = []
+    rs = []
+    fs = []
+    for epoch, (p, r, f) in enumerate(prf_):
+        ps.append(p)
+        rs.append(r)
+        fs.append(f)
+
+    maxp = max(ps)
+    maxr = max(rs)
+    maxf = max(fs)
+
+    maxp_index = ps.index(maxp)
+    maxr_index = rs.index(maxr)
+    maxf_index = fs.index(maxf)
+
+    print('TASK: ', task)
+    print('precision: ', ps)
+    print('recall:    ', rs)
+    print('fscore:    ', fs)
+    print('best precision/recall/fscore [epoch]: ', maxp, ' [', maxp_index, ']', '\t', maxr, ' [', maxr_index, ']',
+          '\t', maxf, ' [', maxf_index, ']')
+    print()
+
+    return (maxp, maxr, maxf)
+
+
+def write_best_epoch(result_dir):
+    # best_dir = params['ev_setting'] + params['ev_eval_best']
+    best_dir = result_dir + 'ev-best/'
+
+    if os.path.exists(best_dir):
+        os.system('rm -rf ' + best_dir)
+    # else:
+    #     os.makedirs(best_dir)
+
+    current_dir = result_dir + 'ev-last/'
+
+    shutil.copytree(current_dir, best_dir)
+
+
 def dumps(obj):
     if isinstance(obj, dict):
         return json.dumps(obj, indent=4, ensure_ascii=False)
@@ -100,6 +196,10 @@ def dumps(obj):
     return obj
 
 
+def debug(*args, **kwargs):
+    print(*map(dumps, args), **kwargs)
+
+
 def get_max_entity_id(span_terms):
     max_id = 0
     for items in span_terms:
@@ -110,8 +210,107 @@ def get_max_entity_id(span_terms):
     return max_id
 
 
+def gen_nn_mapping(tag2id_mapping, tag2type_map, trTypes_Ids):
+    nn_tr_types_ids = []
+    nn_tag_2_type = {}
+    tag_names = []
+    for tag, _id in tag2id_mapping.items():
+        if tag.startswith("I-"):
+            continue
+        tag_names.append(re.sub("^B-", "", tag))
+        if tag2type_map[_id] in trTypes_Ids:
+            nn_tr_types_ids.append(len(tag_names) - 1)
+
+        nn_tag_2_type[len(tag_names) - 1] = tag2type_map[_id]
+
+    id_tag_mapping = {k: v for k, v in enumerate(tag_names)}
+    tag_id_mapping = {v: k for k, v in id_tag_mapping.items()}
+
+    # For multi-label nner
+    assert all(_id == tr_id for _id, tr_id in
+               zip(sorted(id_tag_mapping)[1:], nn_tr_types_ids)), "Trigger IDS must be continuous and on the left side"
+    return {'id_tag_mapping': id_tag_mapping, 'tag_id_mapping': tag_id_mapping, 'trTypes_Ids': nn_tr_types_ids,
+            'tag2type_map': nn_tag_2_type}
+
+
+def padding_samples_lstm(tokens_, ids_, token_mask_, attention_mask_, span_indices_, span_labels_,
+                         span_labels_match_rel_,
+                         entity_masks_, trigger_masks_, gtruth_, l2r_, ev_idxs_, params):
+    # count max lengths:
+    max_seq = 0
+    for ids in ids_:
+        max_seq = max(max_seq, len(ids))
+
+    max_span_labels = 0
+    for span_labels in span_labels_:
+        max_span_labels = max(max_span_labels, len(span_labels))
+
+    for idx, (
+            tokens, ids, token_mask, attention_mask, span_indices, span_labels, span_labels_match_rel, entity_masks,
+            trigger_masks, gtruth, l2r, ev_idxs) in enumerate(
+        zip(tokens_,
+            ids_,
+            token_mask_,
+            attention_mask_,
+            span_indices_,
+            span_labels_,
+            span_labels_match_rel_,
+            entity_masks_,
+            trigger_masks_,
+            gtruth_,
+            l2r_,
+            ev_idxs_)):
+        padding_size = max_seq - len(ids)
+
+        tokens += ["<pad>"] * padding_size
+
+        # Zero-pad up to the sequence length
+        ids += [0] * padding_size
+        token_mask += [0] * padding_size
+        attention_mask += [0] * padding_size
+
+        # Padding for gtruth and l2r
+        # gtruth = np.pad(gtruth, (
+        #     (0, max_span_labels - len(span_indices)), (0, max_span_labels - len(span_indices))),
+        #                 'constant', constant_values=-1)
+
+        # l2r = np.pad(l2r,
+        #              ((0, max_span_labels - len(span_indices)),
+        #               (0, max_span_labels - len(span_indices))),
+        #              'constant', constant_values=-1)
+
+        # Padding for span indices and labels
+        num_padding_spans = max_span_labels - len(span_labels)
+
+        span_indices += [(-1, -1)] * (num_padding_spans * params["ner_label_limit"])
+        span_labels += [np.zeros(params["mappings"]["nn_mapping"]["num_labels"])] * num_padding_spans
+        span_labels_match_rel += [-1] * num_padding_spans
+        entity_masks += [-1] * num_padding_spans
+        trigger_masks += [-1] * num_padding_spans
+
+        # ev_idxs = np.pad(ev_idxs, (0, params['max_span_labels'] - len(ev_idxs)), 'constant', constant_values=-1)
+        # ev_idxs = np.array(ev_idxs)
+
+        gtruth_[idx] = gtruth
+        l2r_[idx] = l2r
+        ev_idxs_[idx] = ev_idxs
+
+        assert len(ids) == max_seq
+        assert len(token_mask) == max_seq
+        assert len(attention_mask) == max_seq
+        assert len(span_indices) == max_span_labels * params["ner_label_limit"]
+        assert len(span_labels) == max_span_labels
+        assert len(span_labels_match_rel) == max_span_labels
+        assert len(entity_masks) == max_span_labels
+        assert len(trigger_masks) == max_span_labels
+        # assert len(gtruth_[idx][0]) == max_span_labels
+        # assert len(l2r_[idx][0]) == max_span_labels
+
+    return max_span_labels
+
+
 def padding_samples(ids_, token_mask_, attention_mask_, span_indices_, span_labels_, span_labels_match_rel_,
-                    entity_masks_, trigger_masks_, params):
+                    entity_masks_, trigger_masks_, gtruth_, l2r_, ev_idxs_, params):
     # count max lengths:
     max_seq = 0
     for ids in ids_:
@@ -123,7 +322,7 @@ def padding_samples(ids_, token_mask_, attention_mask_, span_indices_, span_labe
 
     for idx, (
             ids, token_mask, attention_mask, span_indices, span_labels, span_labels_match_rel, entity_masks,
-            trigger_masks) in enumerate(
+            trigger_masks, gtruth, l2r, ev_idxs) in enumerate(
         zip(
             ids_,
             token_mask_,
@@ -133,7 +332,9 @@ def padding_samples(ids_, token_mask_, attention_mask_, span_indices_, span_labe
             span_labels_match_rel_,
             entity_masks_,
             trigger_masks_,
-        )):
+            gtruth_,
+            l2r_,
+            ev_idxs_)):
         padding_size = max_seq - len(ids)
 
         # Zero-pad up to the sequence length
@@ -141,6 +342,16 @@ def padding_samples(ids_, token_mask_, attention_mask_, span_indices_, span_labe
         token_mask += [0] * padding_size
         attention_mask += [0] * padding_size
 
+        # Padding for gtruth and l2r
+        # gtruth = np.pad(gtruth, (
+        #     (0, max_span_labels - len(span_indices)), (0, max_span_labels - len(span_indices))),
+        #                 'constant', constant_values=-1)
+
+        # l2r = np.pad(l2r,
+        #              ((0, max_span_labels - len(span_indices)),
+        #               (0, max_span_labels - len(span_indices))),
+        #              'constant', constant_values=-1)
+
         # Padding for span indices and labels
         num_padding_spans = max_span_labels - len(span_labels)
 
@@ -150,6 +361,13 @@ def padding_samples(ids_, token_mask_, attention_mask_, span_indices_, span_labe
         entity_masks += [-1] * num_padding_spans
         trigger_masks += [-1] * num_padding_spans
 
+        # ev_idxs = np.pad(ev_idxs, (0, params['max_span_labels'] - len(ev_idxs)), 'constant', constant_values=-1)
+        # ev_idxs = np.array(ev_idxs)
+
+        gtruth_[idx] = gtruth
+        l2r_[idx] = l2r
+        ev_idxs_[idx] = ev_idxs
+
         assert len(ids) == max_seq
         assert len(token_mask) == max_seq
         assert len(attention_mask) == max_seq
@@ -158,12 +376,86 @@ def padding_samples(ids_, token_mask_, attention_mask_, span_indices_, span_labe
         assert len(span_labels_match_rel) == max_span_labels
         assert len(entity_masks) == max_span_labels
         assert len(trigger_masks) == max_span_labels
+        # assert len(gtruth_[idx][0]) == max_span_labels
+        # assert len(l2r_[idx][0]) == max_span_labels
 
     return max_span_labels
 
 
+def partialize_optimizer_models_parameters(model):
+    """
+    Partialize entity, relation and event models parameters from optimizer's parameters
+    """
+    ner_params = list(model.NER_layer.named_parameters())
+    rel_params = list(model.REL_layer.named_parameters())
+    ev_params = list(model.EV_layer.named_parameters())
+
+    return ner_params, rel_params, ev_params
+
+
+def gen_optimizer_grouped_parameters(param_optimizers, name, params):
+    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
+    if not params['bert_warmup_lr']:
+        lr = float(params['ner_lr'])
+        if name == 'rel':
+            lr = float(params['rel_lr'])
+        if name == 'ev':
+            lr = float(params['ev_lr'])
+    else:
+        lr = params['learning_rate']
+
+    optimizer_grouped_parameters = [
+        {
+            "name": name,
+            "params": [
+                p
+                for n, p in param_optimizers
+                if not any(nd in n for nd in no_decay)
+            ],
+            "weight_decay": 0.01,
+            "lr": lr
+        },
+        {
+            "name": name,
+            "params": [
+                p
+                for n, p in param_optimizers
+                if any(nd in n for nd in no_decay)
+            ],
+            "weight_decay": 0.0,
+            "lr": lr
+        },
+    ]
+
+    return optimizer_grouped_parameters
+
+
+def prepare_optimizer_parameters(optimizer, rel_params, ev_params, conf_params, epoch):
+    if not conf_params['skip_ner']:
+        if epoch == conf_params['ner_epoch'] + 1:
+            print("Adding optimizer's REL model params")
+            rel_grouped_params = gen_optimizer_grouped_parameters(rel_params, "rel", conf_params)
+            optimizer.add_param_group(rel_grouped_params[0])
+            optimizer.add_param_group(rel_grouped_params[1])
+    if not conf_params['skip_rel']:
+        if epoch == conf_params['rel_epoch'] + 1:
+            print("Adding optimizer's EV model params")
+            ev_grouped_params = gen_optimizer_grouped_parameters(ev_params, "ev", conf_params)
+            optimizer.add_param_group(ev_grouped_params[0])
+            optimizer.add_param_group(ev_grouped_params[1])
+    else:
+        pass
+
+
 def get_tensors(data_ids, data, params):
-    tokens = []
+    # for lstm
+    if params['use_lstm']:
+        tokens = [
+            data["nn_data"]["tokens"][tr_data_id]
+            for tr_data_id in data_ids[0].tolist()
+        ]
+    else:
+        tokens = []
 
     ids = [
         data["nn_data"]["ids"][tr_data_id]
@@ -198,12 +490,32 @@ def get_tensors(data_ids, data, params):
         data["nn_data"]["trigger_masks"][tr_data_id]
         for tr_data_id in data_ids[0].tolist()
     ]
+    gtruths = [
+        data["nn_data"]["gtruth"][tr_data_id]
+        for tr_data_id in data_ids[0].tolist()
+    ]
+    l2rs = [
+        data["nn_data"]["l2r"][tr_data_id]
+        for tr_data_id in data_ids[0].tolist()
+    ]
 
     span_terms = [
         data["nn_data"]["span_terms"][tr_data_id]
         for tr_data_id in data_ids[0].tolist()
     ]
 
+    truth_evs = [
+        data["nn_data"]["truth_ev"][tr_data_id]
+        for tr_data_id in data_ids[0].tolist()
+    ]
+    ev_idxs = [
+        data["nn_data"]["ev_idxs"][tr_data_id]
+        for tr_data_id in data_ids[0].tolist()
+    ]
+    ev_lbls = [
+        data["nn_data"]["ev_lbls"][tr_data_id]
+        for tr_data_id in data_ids[0].tolist()
+    ]
     etypes = [data["etypes"][tr_data_id] for tr_data_id in data_ids[0].tolist()]
 
     tokens = copy.deepcopy(tokens)
@@ -215,22 +527,50 @@ def get_tensors(data_ids, data, params):
     span_labels_match_rel = copy.deepcopy(span_labels_match_rel)
     entity_masks = copy.deepcopy(entity_masks)
     trigger_masks = copy.deepcopy(trigger_masks)
+    gtruths = copy.deepcopy(gtruths)
+    l2rs = copy.deepcopy(l2rs)
     span_terms = copy.deepcopy(span_terms)
+    truth_evs = copy.deepcopy(truth_evs)
+    ev_idxs = copy.deepcopy(ev_idxs)
+    etypes = copy.deepcopy(etypes)
+
+    # use lstm
+    if params['use_lstm']:
+        max_span_labels = padding_samples_lstm(
+            tokens,
+            ids,
+            token_masks,
+            attention_masks,
+            span_indices,
+            span_labels,
+            span_labels_match_rel,
+            entity_masks,
+            trigger_masks,
+            gtruths,
+            l2rs,
+            ev_idxs,
+            params
+        )
 
-    max_span_labels = padding_samples(
-        ids,
-        token_masks,
-        attention_masks,
-        span_indices,
-        span_labels,
-        span_labels_match_rel,
-        entity_masks,
-        trigger_masks,
-        params
-    )
+    # use bert
+    else:
+        max_span_labels = padding_samples(
+            ids,
+            token_masks,
+            attention_masks,
+            span_indices,
+            span_labels,
+            span_labels_match_rel,
+            entity_masks,
+            trigger_masks,
+            gtruths,
+            l2rs,
+            ev_idxs,
+            params
+        )
 
     # Padding etypes
-    etypes = _to_torch_data(etypes, max_span_labels, params)
+    etypes = c2t_utils._to_torch_data(etypes, max_span_labels, params)
 
     batch_ids = torch.tensor(ids, dtype=torch.long, device=params["device"])
     batch_token_masks = torch.tensor(
@@ -255,6 +595,11 @@ def get_tensors(data_ids, data, params):
         trigger_masks, dtype=torch.int8, device=params["device"]
     )
 
+    batch_gtruths = gtruths
+    batch_l2rs = l2rs
+    batch_truth_evs = truth_evs
+    batch_ev_idxs = ev_idxs
+
     return (
         tokens,
         batch_ids,
@@ -265,7 +610,12 @@ def get_tensors(data_ids, data, params):
         batch_span_labels_match_rel,
         batch_entity_masks,
         batch_trigger_masks,
-        span_terms,
+        batch_gtruths,
+        batch_l2rs,
+        span_terms,  # ! << KHOA WAS HERE
+        batch_truth_evs,
+        batch_ev_idxs,
+        ev_lbls,
         etypes,
         max_span_labels
     )
@@ -411,6 +761,9 @@ def read_lines(filename):
 
 def write_lines(lines, filename, linesep="\n"):
     is_first_line = True
+    # make_dirs(os.path.dirname(filename))
+    # os.makedirs(filename)
+    # with open(abs_path(filename), "w", encoding="UTF-8") as f:
     with open(filename, "w", encoding="UTF-8") as f:
         for line in lines:
             if is_first_line:
@@ -418,3 +771,142 @@ def write_lines(lines, filename, linesep="\n"):
             else:
                 f.write(linesep)
             f.write(line)
+
+        # fig bug that not write file with empty prediction
+        # if len(lines) == 0:
+        #     print(filename)
+        #     f.write(linesep)
+
+
+def list_compare(left, right):
+    """
+    Failed cases:
+    a = np.array([[1,2,3], [4,5,6]])
+    b = np.array([[1,2,3], [4,5,6]])
+    # => Expected value: True
+
+    a = np.array([[1,2,3], [4,5,6]])
+    b = np.array([[1,2,3], np.array([4,5,6])])
+    # => Expected value: True
+
+    a = [np.array([1,2,3]), np.array([4,5,6])]
+    b = [np.array([1,2,3]), np.array([4,5,6])]
+    # => Expected value: True
+
+    a = np.array([[1,2,3], [1,2,3]])
+    b = np.array([[1,2,3]])
+    # => Expected value: False
+    """
+    if isinstance(left, np.ndarray):
+        left = left.tolist()
+
+    if isinstance(right, np.ndarray):
+        right = right.tolist()
+
+    if (isinstance(right, list) and not isinstance(left, list)) or (
+            isinstance(left, list) and not isinstance(right, list)):
+        return False
+
+    try:
+        return left == right
+    except:
+        try:
+            if len(left) == len(right):
+                for left_, right_ in zip(left, right):
+                    if not list_compare(left_, right_):
+                        return False
+                return True
+            else:
+                return False
+        except:
+            return False
+
+
+def compare_event_truth(ev, truth):
+    if isinstance(ev, list) and isinstance(truth, list):
+        ev_args = sort_ev_args(ev, truth)
+        if ev_args:
+            truth_args = truth[1]
+            return compare_args(ev_args, truth_args)
+    else:
+        return list_compare(ev, truth)
+
+
+def sort_ev_args(ev, truth):
+    if len(ev[0]) != len(truth[0]):
+        return None
+    ev_can = ev[0]
+    truth_can = truth[0]
+    ev_args = ev[1]
+    ev_sorted_args = []
+    for can in truth_can:
+        if can in ev_can:
+            ev_sorted_args.append(ev_args[ev_can.index(can)])
+        else:
+            return None
+
+    return ev_sorted_args
+
+
+def compare_args(ev_args, truth_args):
+    if isinstance(ev_args, np.ndarray):
+        ev_args = ev_args.tolist()
+
+    if isinstance(truth_args, np.ndarray):
+        truth_args = truth_args.tolist()
+
+    if isinstance(ev_args, list) and isinstance(truth_args, list):
+        if len(ev_args) != len(truth_args):
+            return False
+        for ev_arg, truth_arg in zip(ev_args, truth_args):
+            if not compare_event_truth(ev_arg, truth_arg):
+                return False
+        return True
+    else:
+        return False
+
+
+def write_annotation_file(
+        ann_file, entities=None, triggers=None, relations=None, events=None
+):
+    lines = []
+
+    def annotate_text_bound(entities):
+        for entity in entities.values():
+            entity_annotation = "{}\t{} {} {}\t{}".format(
+                entity["id"],
+                entity["type"],
+                entity["start"],
+                entity["end"],
+                entity["ref"],
+            )
+            lines.append(entity_annotation)
+
+    if entities:
+        annotate_text_bound(entities)
+
+    if triggers:
+        annotate_text_bound(triggers)
+
+    if relations:
+        for relation in relations.values():
+            relation_annotation = "{}\t{} {}:{} {}:{}".format(
+                relation["id"],
+                relation["role"],
+                relation["left_arg"]["label"],
+                relation["left_arg"]["id"],
+                relation["right_arg"]["label"],
+                relation["right_arg"]["id"],
+            )
+            lines.append(relation_annotation)
+
+    if events:
+        for event in events.values():
+            event_annotation = "{}\t{}:{}".format(
+                event["id"], event["trigger_type"], event["trigger_id"]
+            )
+            for arg in event["args"]:
+                event_annotation += " {}:{}".format(arg["role"], arg["id"])
+            lines.append(event_annotation)
+
+    write_lines(lines, ann_file)

From b56641205ef5e7cd2379b840385e6ca208f55584 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:38:07 +0900
Subject: [PATCH 16/70] readme

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 84fc031..64922c8 100644
--- a/README.md
+++ b/README.md
@@ -95,6 +95,7 @@ sh run/train/generate_configs-debug.sh cg debug
 
 4. Training
 - Pretrain layers (these need to be done before training the joint model)
+- Replace "basic" by "debug" to quickly try experiments on the small data (debug mode)
 ```bash
 sh run.sh cg basic train ner
 sh run.sh cg basic train rel

From b2734fb02f85d9f18d82a733124b903664738d82 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:52:06 +0900
Subject: [PATCH 17/70] training scripts

---
 README.md          | 10 +++++-----
 run/train/train.sh | 24 ++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 5 deletions(-)
 create mode 100644 run/train/train.sh

diff --git a/README.md b/README.md
index 64922c8..71eaf9a 100644
--- a/README.md
+++ b/README.md
@@ -97,20 +97,20 @@ sh run/train/generate_configs-debug.sh cg debug
 - Pretrain layers (these need to be done before training the joint model)
 - Replace "basic" by "debug" to quickly try experiments on the small data (debug mode)
 ```bash
-sh run.sh cg basic train ner
-sh run.sh cg basic train rel
-sh run.sh cg basic train ev
+sh run/train/train.sh experiments/cg/basic/configs/train-ner.yaml
+sh run/train/train.sh experiments/cg/basic/configs/train-rel.yaml
+sh run/train/train.sh experiments/cg/basic/configs/train-ev.yaml
 ```
 
 - Train joint model: given gold entity
 ```bash
-sh run.sh cg basic train joint gold
+sh run/train/train.sh experiments/cg/basic/configs/train-joint-gold.yaml
 ```
 
 - Train joint end-to-end model
 
 ```bash
-sh run.sh cg basic train joint e2e
+sh run/train/train.sh experiments/cg/basic/configs/train-joint-e2e.yaml
 ```
 
 # 3. Predict (BioNLP tasks)
diff --git a/run/train/train.sh b/run/train/train.sh
new file mode 100644
index 0000000..91193d6
--- /dev/null
+++ b/run/train/train.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+ROOT=$PWD
+
+export PYTHONPATH="${PYTHONPATH}:$ROOT"
+export PYTHONPATH="${PYTHONPATH}:$ROOT/model"
+
+CONFIG_PATH=$1
+
+CONFIG_NAME=$(basename "$CONFIG_PATH")
+CONFIG_NAME=(${CONFIG_NAME//./ })
+
+CONFIG_DIR=$(dirname "$YAML_PATH")
+EXP_DIR=$(dirname "$CONFIG_DIR")
+LOG_DIR=$EXP_DIR/logs
+mkdir -p $LOG_DIR
+
+nohup python -u main.py --yaml $YAML_PATH >> $LOG_DIR/$CONFIG_NAME.log &
+
+echo "The training log can be viewed at: $LOG_DIR/$CONFIG_NAME.log"
+echo "Please wait several seconds to see it here."
+
+sleep 5
+tail -f $LOG_DIR/$CONFIG_NAME.log
\ No newline at end of file

From 7458407924862467a75f6128a39026ee7a4ec036 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 16:57:54 +0900
Subject: [PATCH 18/70] fix bug

---
 run/train/train.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/run/train/train.sh b/run/train/train.sh
index 91193d6..028bd10 100644
--- a/run/train/train.sh
+++ b/run/train/train.sh
@@ -10,12 +10,12 @@ CONFIG_PATH=$1
 CONFIG_NAME=$(basename "$CONFIG_PATH")
 CONFIG_NAME=(${CONFIG_NAME//./ })
 
-CONFIG_DIR=$(dirname "$YAML_PATH")
+CONFIG_DIR=$(dirname "$CONFIG_PATH")
 EXP_DIR=$(dirname "$CONFIG_DIR")
 LOG_DIR=$EXP_DIR/logs
 mkdir -p $LOG_DIR
 
-nohup python -u main.py --yaml $YAML_PATH >> $LOG_DIR/$CONFIG_NAME.log &
+nohup python -u train.py --yaml $CONFIG_PATH >> $LOG_DIR/$CONFIG_NAME.log &
 
 echo "The training log can be viewed at: $LOG_DIR/$CONFIG_NAME.log"
 echo "Please wait several seconds to see it here."

From fb2a89821947c4f105ee217cc5042c7c45f4429d Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 17:14:26 +0900
Subject: [PATCH 19/70] script name

---
 model/training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/model/training.py b/model/training.py
index 6414bf9..5046db0 100644
--- a/model/training.py
+++ b/model/training.py
@@ -4,7 +4,7 @@
 import os
 import pickle
 
-from eval.evaluation import eval
+from eval.evaluate import eval
 from utils import utils
 from utils.utils import debug, path
 from utils.utils import (

From c12486d1549eb7e3c026d8d9158cd8cde88ea218 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 17:15:22 +0900
Subject: [PATCH 20/70] missing script

---
 scripts/pipeline_process.py | 185 ++++++++++++++++++++++++++++++++++++
 1 file changed, 185 insertions(+)
 create mode 100644 scripts/pipeline_process.py

diff --git a/scripts/pipeline_process.py b/scripts/pipeline_process.py
new file mode 100644
index 0000000..dd62541
--- /dev/null
+++ b/scripts/pipeline_process.py
@@ -0,0 +1,185 @@
+from collections import defaultdict
+
+from utils.utils import write_annotation_file
+
+
+def get_entity_attrs(e_span_indice, words, offsets, sub_to_words):
+    e_words = []
+    e_offset = [-1, -1]
+    curr_word_idx = -1
+    for idx in range(e_span_indice[0], e_span_indice[1] + 1):
+        if sub_to_words[idx] != curr_word_idx:
+            e_words.append(words[sub_to_words[idx]])
+            curr_word_idx = sub_to_words[idx]
+        if idx == e_span_indice[0]:
+            e_offset[0] = offsets[sub_to_words[idx]][0]
+        if idx == e_span_indice[1]:
+            e_offset[1] = offsets[sub_to_words[idx]][1]
+    return ' '.join(e_words), (e_offset[0], e_offset[1])
+
+
+def gen_sw_offsets(word_offsets, words, subwords, sub_to_words):
+    sw_offsets = []
+    last_sw_offsets = -1
+    for sw_id, w_id in sub_to_words.items():
+        subword = subwords[sw_id].replace('##', '')
+        word = words[w_id]
+        word_offset = word_offsets[w_id]
+        sw_idx = word.index(subword,
+                            0 if (last_sw_offsets == -1 or last_sw_offsets < word_offset[0]) else last_sw_offsets - 1 -
+                                                                                                  word_offset[0])
+        sw_offsets.append((word_offset[0] + sw_idx, word_offset[0] + sw_idx + len(subword)))
+        last_sw_offsets = word_offset[0] + sw_idx + len(subword)
+    return sw_offsets
+
+
+def get_entity_sw_attrs(e_id, e_span_indice, words, offsets, sub_to_words, subwords, sw_offsets, org_mapping):
+    e_words = []
+    e_offset = [-1, -1]
+    sw_text = []
+    sw_offset = [-1, -1]
+
+    curr_word_idx = -1
+    for idx in range(e_span_indice[0], e_span_indice[1] + 1):
+        if sub_to_words[idx] != curr_word_idx:
+            e_words.append(words[sub_to_words[idx]])
+            curr_word_idx = sub_to_words[idx]
+        sw_text.append(subwords[idx])
+        if idx == e_span_indice[0]:
+            e_offset[0] = offsets[sub_to_words[idx]][0]
+            sw_offset[0] = sw_offsets[idx][0]
+        if idx == e_span_indice[1]:
+            e_offset[1] = offsets[sub_to_words[idx]][1]
+            sw_offset[1] = sw_offsets[idx][1]
+    org_mapping[e_id] = (' '.join(e_words), (e_offset[0], e_offset[1]))
+    return ' '.join(sw_text), (sw_offset[0], sw_offset[1])
+
+
+def gen_ner_ann_files(fidss, ent_anns, params):
+    dir2wr = params['pipeline_setting'] + params['pipe_ner']
+
+    # Initial ent map
+    map = defaultdict()
+    org_map = defaultdict()
+    params['pipeline_text_data'] = defaultdict()
+    for fids in fidss:
+        for fid in fids:
+            map[fid] = {}
+            org_map[fid] = {}
+            params['pipeline_text_data'][fid] = []
+
+    for xi, (fids, ent_ann) in enumerate(zip(fidss, ent_anns)):
+        # Mapping entities
+        entity_map = defaultdict()
+        for xb, (fid) in enumerate(fids):
+            span_indices = ent_ann['span_indices'][xb]
+            ner_terms = ent_ann['ner_terms'][xb]
+            ner_preds = ent_ann['ner_preds'][xb]
+            words = ent_ann['words'][xb]
+            offsets = ent_ann['offsets'][xb]
+            sub_to_words = ent_ann['sub_to_words'][xb]
+            subwords = ent_ann['subwords'][xb]
+            sw_offsets = gen_sw_offsets(offsets, words, subwords, sub_to_words)
+            params['pipeline_text_data'][fid].append(
+                {'words': subwords, 'offsets': sw_offsets})
+            entities = map[fid]
+            org_mapping = org_map[fid]
+
+            for x, pair in enumerate(span_indices):
+                if pair[0].item() == -1:
+                    break
+                if ner_preds[x] > 0:
+                    try:
+                        e_id = ner_terms.id2term[x]
+                        e_type = params['mappings']['rev_type_map'][
+                            params['mappings']['nn_mapping']['tag2type_map'][ner_preds[x]]]
+                        e_words, e_offset = get_entity_sw_attrs(e_id, pair, words, offsets, sub_to_words, subwords,
+                                                                sw_offsets, org_mapping)
+                        entity_map[(xb, (pair[0].item(), pair[1].item()))] = (
+                            ner_preds[x], e_id, e_type, e_words, e_offset)
+                        entities[e_id] = {"id": e_id, "type": e_type, "start": e_offset[0], "end": e_offset[1],
+                                          "ref": e_words}
+                    except KeyError as error:
+                        print('pred not map term', error)
+
+    params['pipeline_entity_org_map'] = org_map
+
+    for fid, ners in map.items():
+        write_annotation_file(ann_file=dir2wr + fid + '.ann', entities=ners)
+
+
+def gen_rel_ann_files(fidss, ent_anns, rel_anns, params):
+    dir2wr = params['pipeline_setting'] + params['pipe_rel']
+
+    # Initial ent+rel map
+    map = defaultdict()
+    for fids in fidss:
+        for fid in fids:
+            map[fid] = {'ents': {}, 'rels': {}}
+
+    for xi, (fids, ent_ann, rel_ann) in enumerate(zip(fidss, ent_anns, rel_anns)):
+        # Mapping entities
+        entity_map = defaultdict()
+        for xb, (fid) in enumerate(fids):
+            span_indices = ent_ann['span_indices'][xb]
+            ner_terms = ent_ann['ner_terms'][xb]
+            ner_preds = ent_ann['ner_preds'][xb]
+            words = ent_ann['words'][xb]
+            offsets = ent_ann['offsets'][xb]
+            sub_to_words = ent_ann['sub_to_words'][xb]
+
+            entities = map[fid]['ents']
+
+            for x, pair in enumerate(span_indices):
+                if pair[0].item() == -1:
+                    break
+                if ner_preds[x] > 0:
+                    try:
+                        e_id = ner_terms.id2term[x]
+                        e_type = params['mappings']['rev_type_map'][
+                            params['mappings']['nn_mapping']['tag2type_map'][ner_preds[x]]]
+                        e_words, e_offset = get_entity_attrs(pair, words, offsets, sub_to_words)
+                        entity_map[(xb, (pair[0].item(), pair[1].item()))] = (
+                            ner_preds[x], e_id, e_type, e_words, e_offset)
+                        entities[e_id] = {"id": e_id, "type": e_type, "start": e_offset[0], "end": e_offset[1],
+                                          "ref": e_words}
+                    except KeyError as error:
+                        print('pred not map term', error)
+        if len(rel_ann) > 0:
+            # Mapping relations
+            pairs_idx = rel_ann['pairs_idx']
+            rel_preds = rel_ann['rel_preds']
+
+            pairs_idx_i = pairs_idx[0]
+            pairs_idx_j = pairs_idx[1]
+            pairs_idx_k = pairs_idx[2]
+
+            for x, i in enumerate(pairs_idx_i):
+                relations = map[fids[i]]['rels']
+                r_count = len(relations) + 1
+
+                j = pairs_idx_j[x]
+                k = pairs_idx_k[x]
+                rel = rel_preds[x].item()
+                role = params['mappings']['rev_rel_map'][rel].split(":")[1]
+                if role != 'Other':
+                    arg1s = entity_map[
+                        (i.item(), (ent_ann['span_indices'][i][j][0].item(), ent_ann['span_indices'][i][j][1].item()))]
+                    arg2s = entity_map[
+                        (i.item(), (ent_ann['span_indices'][i][k][0].item(), ent_ann['span_indices'][i][k][1].item()))]
+
+                    if int(params['mappings']['rev_rel_map'][rel].split(":")[0]) > int(
+                            params['mappings']['rev_rel_map'][rel].split(":")[-1]):
+                        arg1 = arg2s[1]
+                        arg2 = arg1s[1]
+                    else:
+                        arg1 = arg1s[1]
+                        arg2 = arg2s[1]
+                    r_id = 'R' + str(r_count)
+                    r_count += 1
+                    relations[r_id] = {"id": r_id, "role": role,
+                                       "left_arg": {"label": "Arg1", "id": arg1},
+                                       "right_arg": {"label": "Arg2", "id": arg2}}
+
+    for fid, ners_rels in map.items():
+        write_annotation_file(ann_file=dir2wr + fid + '.ann', entities=ners_rels['ents'], relations=ners_rels['rels'])

From 59808e3ab171c37b8947edb4a55d8af26e28408e Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 17:23:17 +0900
Subject: [PATCH 21/70] epochs for debug mode

---
 scripts/generate_configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/generate_configs.py b/scripts/generate_configs.py
index 130b1b5..8698cd8 100644
--- a/scripts/generate_configs.py
+++ b/scripts/generate_configs.py
@@ -212,7 +212,7 @@ def set_debug_mode(configs, args):
             configs['test_data'] = configs['test_data'].replace('dev', "debug")
             configs['test_data'] = configs['test_data'].replace('test', "debug")
         if "epoch" in configs:
-            configs["epoch"] = 2
+            configs["epoch"] = 10
 
 def generate_configs(args, expdir, task, exp_name):
     """Generate configs for all."""

From 025690f5050b3ada10fbb83be98fff02e61bd446 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 23:35:34 +0900
Subject: [PATCH 22/70] scripts path

---
 README.md                                 | 18 +++++++++---------
 run/{train => }/download-bert.sh          |  0
 run/{train => }/generate-configs-debug.sh |  0
 run/{train => }/generate-configs.sh       |  0
 run/{train => }/prepare-cg.sh             |  0
 run/{train => }/train.sh                  |  0
 6 files changed, 9 insertions(+), 9 deletions(-)
 rename run/{train => }/download-bert.sh (100%)
 rename run/{train => }/generate-configs-debug.sh (100%)
 rename run/{train => }/generate-configs.sh (100%)
 rename run/{train => }/prepare-cg.sh (100%)
 rename run/{train => }/train.sh (100%)

diff --git a/README.md b/README.md
index 71eaf9a..4e01a83 100644
--- a/README.md
+++ b/README.md
@@ -73,44 +73,44 @@ python2 standalone.py
 - Download the processed event structures
 
 ```bash
-sh run/train/prepare-cg.sh
+sh run/prepare-cg.sh
 ```
 
 2. Download models
 - Download SciBERT model from PyTorch AllenNLP
 ```bash
-sh run/train/download-bert.sh
+sh run/download-bert.sh
 ```
 
 3. Generate configs
 - Configs for training CG task
 ```bash
-sh run/train/generate_configs.sh cg basic
+sh run/generate_configs.sh cg basic
 ```
 - Experiment name: basic, exp1, exp2, etc
 - Or running this debug mode (on a small data with several epochs)
 ```bash
-sh run/train/generate_configs-debug.sh cg debug
+sh run/generate_configs-debug.sh cg debug
 ```
 
 4. Training
 - Pretrain layers (these need to be done before training the joint model)
 - Replace "basic" by "debug" to quickly try experiments on the small data (debug mode)
 ```bash
-sh run/train/train.sh experiments/cg/basic/configs/train-ner.yaml
-sh run/train/train.sh experiments/cg/basic/configs/train-rel.yaml
-sh run/train/train.sh experiments/cg/basic/configs/train-ev.yaml
+sh run/train.sh experiments/cg/basic/configs/train-ner.yaml
+sh run/train.sh experiments/cg/basic/configs/train-rel.yaml
+sh run/train.sh experiments/cg/basic/configs/train-ev.yaml
 ```
 
 - Train joint model: given gold entity
 ```bash
-sh run/train/train.sh experiments/cg/basic/configs/train-joint-gold.yaml
+sh run/train.sh experiments/cg/basic/configs/train-joint-gold.yaml
 ```
 
 - Train joint end-to-end model
 
 ```bash
-sh run/train/train.sh experiments/cg/basic/configs/train-joint-e2e.yaml
+sh run/train.sh experiments/cg/basic/configs/train-joint-e2e.yaml
 ```
 
 # 3. Predict (BioNLP tasks)
diff --git a/run/train/download-bert.sh b/run/download-bert.sh
similarity index 100%
rename from run/train/download-bert.sh
rename to run/download-bert.sh
diff --git a/run/train/generate-configs-debug.sh b/run/generate-configs-debug.sh
similarity index 100%
rename from run/train/generate-configs-debug.sh
rename to run/generate-configs-debug.sh
diff --git a/run/train/generate-configs.sh b/run/generate-configs.sh
similarity index 100%
rename from run/train/generate-configs.sh
rename to run/generate-configs.sh
diff --git a/run/train/prepare-cg.sh b/run/prepare-cg.sh
similarity index 100%
rename from run/train/prepare-cg.sh
rename to run/prepare-cg.sh
diff --git a/run/train/train.sh b/run/train.sh
similarity index 100%
rename from run/train/train.sh
rename to run/train.sh

From d2197b0c1417432cdf3ea93eeba31a637f7dda93 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 23:39:35 +0900
Subject: [PATCH 23/70] predict

---
 README.md      | 13 +++++++++++++
 predict.py     |  2 +-
 run/predict.sh | 24 ++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 run/predict.sh

diff --git a/README.md b/README.md
index 4e01a83..011f2e9 100644
--- a/README.md
+++ b/README.md
@@ -113,6 +113,19 @@ sh run/train.sh experiments/cg/basic/configs/train-joint-gold.yaml
 sh run/train.sh experiments/cg/basic/configs/train-joint-e2e.yaml
 ```
 
+5. Predict
+- Given gold entity
+```bash
+sh run/predict.sh experiments/cg/basic/configs/predict-gold-dev.yaml
+sh run/predict.sh experiments/cg/basic/configs/predict-gold-test.yaml
+```
+
+- End-to-end
+```bash
+sh run/predict.sh experiments/cg/basic/configs/predict-e2e-dev.yaml
+sh run/predict.sh experiments/cg/basic/configs/predict-e2e-test.yaml
+```
+
 # 3. Predict (BioNLP tasks)
 
 ## 3.1. Prepare data
diff --git a/predict.py b/predict.py
index fc67343..63aad3e 100644
--- a/predict.py
+++ b/predict.py
@@ -7,7 +7,7 @@
 
 from eval.evaluate import predict
 
-from nets import deepEM
+from model import deepEM
 from loader.prepData import prepdata
 from loader.prepNN import prep4nn
 from utils import utils
diff --git a/run/predict.sh b/run/predict.sh
new file mode 100644
index 0000000..dfe7e22
--- /dev/null
+++ b/run/predict.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+ROOT=$PWD
+
+export PYTHONPATH="${PYTHONPATH}:$ROOT"
+export PYTHONPATH="${PYTHONPATH}:$ROOT/model"
+
+CONFIG_PATH=$1
+
+CONFIG_NAME=$(basename "$CONFIG_PATH")
+CONFIG_NAME=(${CONFIG_NAME//./ })
+
+CONFIG_DIR=$(dirname "$CONFIG_PATH")
+EXP_DIR=$(dirname "$CONFIG_DIR")
+LOG_DIR=$EXP_DIR/logs
+mkdir -p $LOG_DIR
+
+nohup python -u predict.py --yaml $CONFIG_PATH >> $LOG_DIR/$CONFIG_NAME.log &
+
+echo "The training log can be viewed at: $LOG_DIR/$CONFIG_NAME.log"
+echo "Please wait several seconds to see it here."
+
+sleep 5
+tail -f $LOG_DIR/$CONFIG_NAME.log
\ No newline at end of file

From cc7eaa47af5822e298019a875faf84eb354aee88 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 23:44:09 +0900
Subject: [PATCH 24/70] prediction script

---
 predict.py | 220 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 178 insertions(+), 42 deletions(-)

diff --git a/predict.py b/predict.py
index 63aad3e..d237630 100644
--- a/predict.py
+++ b/predict.py
@@ -1,12 +1,13 @@
 import os
 import random
 import pickle
+import time
+
 import numpy as np
 import torch
 from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
 
-from eval.evaluate import predict
-
+from eval.evaluate import eval
 from model import deepEM
 from loader.prepData import prepdata
 from loader.prepNN import prep4nn
@@ -14,94 +15,220 @@
 
 
 def main():
-    # read predict config
+    # check running time
+    t_start = time.time()
+
     # set config path by command line
     inp_args = utils._parsing()
     config_path = getattr(inp_args, 'yaml')
 
     # set config path manually
-    # config_path = 'configs/debug.yaml'
+    # config_path = 'configs/default.yaml'
 
     with open(config_path, 'r') as stream:
-        pred_params = utils._ordered_load(stream)
+        parameters = utils._ordered_load(stream)
+
+    # print config
+    utils._print_config(parameters, config_path)
+
+    pred_params = parameters
 
     # Fix seed for reproducibility
-    os.environ["PYTHONHASHSEED"] = str(pred_params['seed'])
-    random.seed(pred_params['seed'])
-    np.random.seed(pred_params['seed'])
-    torch.manual_seed(pred_params['seed'])
+    os.environ["PYTHONHASHSEED"] = str(parameters['seed'])
+    random.seed(parameters['seed'])
+    np.random.seed(parameters['seed'])
+    torch.manual_seed(parameters['seed'])
 
     torch.backends.cudnn.deterministic = True
     torch.backends.cudnn.benchmark = False
 
+    # Load configurations for prediction only
+    test_data_dir = parameters['test_data']
+    params_dir = parameters['params']
+    pipelines = parameters['pipelines']
+    t_gpu = parameters['t_gpu']
+    t_fp16 = parameters['t_fp16']
+    t_batch_size = parameters['t_batch_size']
+
+    rel_eval_script_path = parameters['rel_eval_script_path']
+
+    ev_eval_script_path = parameters['ev_eval_script_path']
+
+    gold_eval = parameters['gold_eval']
+
+    bert_model = parameters['bert_model']
+
     # Load pre-trained parameters
-    with open(pred_params['saved_params'], "rb") as f:
+    with open(params_dir, "rb") as f:
         parameters = pickle.load(f)
 
     parameters['predict'] = True
 
-    # Set predict settings value for params
-    parameters['gpu'] = pred_params['gpu']
-    parameters['batchsize'] = pred_params['batchsize']
+    parameters['gpu'] = t_gpu
+    parameters['fp16'] = t_fp16
+    parameters['batchsize'] = t_batch_size
     if parameters['gpu'] >= 0:
         device = torch.device("cuda:" + str(parameters['gpu']) if torch.cuda.is_available() else "cpu")
+
         torch.cuda.set_device(parameters['gpu'])
     else:
         device = torch.device("cpu")
     parameters['device'] = device
 
     # Set evaluation settings
-    parameters['test_data'] = pred_params['test_data']
+    parameters['test_data'] = test_data_dir
+    parameters['rel_eval_script_path'] = rel_eval_script_path
+    parameters['ev_eval_script_path'] = ev_eval_script_path
 
-    parameters['bert_model'] = pred_params['bert_model']
+    parameters['gold_eval'] = gold_eval
+    parameters['bert_model'] = bert_model
+    parameters['pipelines'] = pipelines
 
     result_dir = pred_params['result_dir']
     if not os.path.exists(result_dir):
         os.makedirs(result_dir)
+    parameters['pipeline_setting'] = result_dir
+    parameters['pipe_ner'] = 'pipe_ner/'
+    parameters['pipe_rel'] = 'pipe_rel/'
+    parameters['pipe_ev'] = 'pipe_ev/'
+    pipe_ner = parameters['pipeline_setting'] + parameters['pipe_ner']
+    pipe_rel = parameters['pipeline_setting'] + parameters['pipe_rel']
+    preprocess_pipe_dir(parameters['test_data'], pipe_ner)
+    preprocess_pipe_dir(parameters['test_data'], pipe_rel)
 
     parameters['result_dir'] = pred_params['result_dir']
 
-    # raw text
-    parameters['raw_text'] = pred_params['raw_text']
-    parameters['ner_predict_all'] = pred_params['raw_text']
-    parameters['a2_entities'] = pred_params['a2_entities']
-
-    # process data
-    test_data = prepdata.prep_input_data(pred_params['test_data'], parameters)
-    nntest_data, test_dataloader = read_test_data(test_data, parameters)
+    parameters['ner_predict_all'] = pred_params['ner_predict_all']
 
-    # model
-    deepee_model = deepEM.DeepEM(parameters)
+    if pipelines:
 
-    model_path = pred_params['model_path']
+        ner_model_dir = pred_params['ner_model_dir']
+        rel_model_dir = pred_params['rel_model_dir']
+        ev_model_dir = pred_params['ev_model_dir']
 
-    # Load all models
-    utils.handle_checkpoints(model=deepee_model,
-                             checkpoint_dir=model_path,
-                             params={
-                                 'device': device
-                             },
-                             resume=True)
+        model_dir = {'NER': ner_model_dir, 'REL': rel_model_dir, 'EV': ev_model_dir}
+        data_dir = {'NER': test_data_dir, 'REL': pipe_ner, 'EV': pipe_rel}
+        # 1.Run NER model
+        print("Start running NER model")
+        run_pipeline('NER', model_dir, data_dir, parameters, device)
+        # 2.Run REL model
+        print("Start running REL model")
+        run_pipeline('REL', model_dir, data_dir, parameters, device)
+        # 3. Run EV model (final)
+        print("Start running EV model")
+        run_pipeline('EV', model_dir, data_dir, parameters, device)
 
+    else:
+        # 1. process data
+        test_data = prepdata.prep_input_data(test_data_dir, parameters)
+        test_data, test_dataloader = read_test_data(test_data, parameters)
+
+        # 2. model
+        # Init zero model
+        deepee_model = deepEM.DeepEM(parameters)
+
+        deepee_model_dir = pred_params['joint_model_dir']
+
+        # Load all models
+        utils.handle_checkpoints(model=deepee_model,
+                                 checkpoint_dir=deepee_model_dir,
+                                 params={
+                                     'device': device
+                                 },
+                                 resume=True)
+
+        deepee_model.to(device)
+
+        # create output directory for results
+        result_dir = parameters['result_dir']
+        if not os.path.exists(result_dir):
+            os.makedirs(result_dir)
+
+        eval(model=deepee_model,
+             eval_dir=parameters['test_data'],
+             result_dir=result_dir,
+             eval_dataloader=test_dataloader,
+             eval_data=test_data,
+             params=parameters)
+
+    print('PREDICT: DONE!')
+
+    # calculate running time
+    t_end = time.time()
+    print('TOTAL RUNNING TIME: {}'.format(utils._humanized_time(t_end - t_start)))
+
+
+def run_pipeline(model_type, model_dir, test_data_dir, params, device):
+    if model_type == 'NER':
+        deepee_model = deepEM.DeepEM(params)
+        utils.handle_checkpoints(model=deepee_model.NER_layer,
+                                 checkpoint_dir=model_dir[model_type] + 'ner_model/',
+                                 params={
+                                     'device': device
+                                 },
+                                 resume=True)
+        params['pipe_flag'] = 0
+    elif model_type == 'REL':
+        deepee_model = deepEM.DeepEM(params)
+        utils.handle_checkpoints(model=deepee_model.REL_layer,
+                                 checkpoint_dir=model_dir[model_type] + 'rel_model/',
+                                 params={
+                                     'device': device
+                                 },
+                                 resume=True)
+        utils.handle_checkpoints(model=deepee_model.NER_layer,
+                                 checkpoint_dir=model_dir[model_type] + 'ner_model/',
+                                 params={
+                                     'device': device
+                                 },
+                                 resume=True)
+        params['pipe_flag'] = 1
+    elif model_type == 'EV':
+        deepee_model = deepEM.DeepEM(params)
+        utils.handle_checkpoints(model=deepee_model.EV_layer,
+                                 checkpoint_dir=model_dir[model_type] + 'ev_model/',
+                                 params={
+                                     'device': device
+                                 },
+                                 resume=True)
+        utils.handle_checkpoints(model=deepee_model.REL_layer,
+                                 checkpoint_dir=model_dir[model_type] + 'rel_model/',
+                                 params={
+                                     'device': device
+                                 },
+                                 resume=True)
+        utils.handle_checkpoints(model=deepee_model.NER_layer,
+                                 checkpoint_dir=model_dir[model_type] + 'ner_model/',
+                                 params={
+                                     'device': device
+                                 },
+                                 resume=True)
+        params['pipe_flag'] = 2
+    test_data = prepdata.prep_input_data(test_data_dir[model_type], params)
+    test_data, test_dataloader = read_test_data(test_data, params)
     deepee_model.to(device)
 
-    predict(model=deepee_model,
-            result_dir=result_dir,
-            eval_dataloader=test_dataloader,
-            eval_data=nntest_data,
-            g_entity_ids_=test_data['g_entity_ids_'],
-            params=parameters)
+    # create output directory for results
+    result_dir = 'results/' + params['experiment_name'] + '/'
+    if not os.path.exists(result_dir):
+        os.makedirs(result_dir)
 
-    # print('Done!')
+    eval(model=deepee_model,
+         eval_dir=test_data_dir['NER'],
+         result_dir=result_dir,
+         eval_dataloader=test_dataloader,
+         eval_data=test_data,
+         params=params)
 
 
 def read_test_data(test_data, params):
-    test = prep4nn.data2network(test_data, 'predict', params)
+    test, test_events_map = prep4nn.data2network(test_data, 'predict', params)
 
     if len(test) == 0:
         raise ValueError("Test set empty.")
 
-    test_data = prep4nn.torch_data_2_network(cdata2network=test, params=params, do_get_nn_data=True)
+    test_data = prep4nn.torch_data_2_network(cdata2network=test, events_map=test_events_map, params=params,
+                                             do_get_nn_data=True)
     te_data_size = len(test_data['nn_data']['ids'])
 
     test_data_ids = TensorDataset(torch.arange(te_data_size))
@@ -110,5 +237,14 @@ def read_test_data(test_data, params):
     return test_data, test_dataloader
 
 
+def preprocess_pipe_dir(test_dir, pipe_dir):
+    if not os.path.exists(pipe_dir):
+        os.makedirs(pipe_dir)
+    else:
+        os.system('rm ' + pipe_dir + '*')
+    command = 'cp ' + test_dir + '*.txt ' + pipe_dir
+    os.system(command)
+
+
 if __name__ == '__main__':
     main()

From ff8f6db0349c75ba6e1fed6f5d889613aa0bd4c9 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 23:56:51 +0900
Subject: [PATCH 25/70] readme

---
 README.md | 84 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 011f2e9..0bc014c 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,19 @@
 # DeepEventMine
+
 A deep leanring model to predict named entities, triggers, and nested events from biomedical texts.
 
 - The model and results are reported in our paper:
 
-[DeepEventMine: End-to-end Neural Nested Event Extraction from Biomedical Texts](https://doi.org/10.1093/bioinformatics/btaa540), Bioinformatics, 2020.
+[DeepEventMine: End-to-end Neural Nested Event Extraction from Biomedical Texts](https://doi.org/10.1093/bioinformatics/btaa540)
+, Bioinformatics, 2020.
 
 ## Overview
+
 1. Features
-- Based on [pre-trained BERT](https://github.com/allenai/scibert)
-- Predict nested entities and nested events
-- Provide our trained models on the seven biomedical tasks
+
+- End-to-end event extraction, fine-tuned on [pre-trained BERT](https://github.com/allenai/scibert)
+- Train and predict nested entities and nested events
+- Provide our pre-trained models on seven biomedical tasks
 - Reproduce the results reported in our [Bioinformatics](https://doi.org/10.1093/bioinformatics/btaa540) paper
 - Predict for new data given raw text input or PubMed ID
 - Visualize the predicted entities and events on the [brat](http://brat.nlplab.org)
@@ -22,11 +26,13 @@ A deep leanring model to predict named entities, triggers, and nested events fro
 2. ge11: [GENIA Event Extraction (GENIA), 2011](http://2011.bionlp-st.org/home/genia-event-extraction-genia)
 3. ge13: [GENIA Event Extraction (GENIA), 2013](http://bionlp.dbcls.jp/projects/bionlp-st-ge-2013/wiki/Overview)
 4. id: [Infectious Diseases (ID), 2011](http://2011.bionlp-st.org/home/infectious-diseases)
-5. epi: [Epigenetics and Post-translational Modifications (EPI), 2011](http://2011.bionlp-st.org/home/epigenetics-and-post-translational-modifications)
+5.
+epi: [Epigenetics and Post-translational Modifications (EPI), 2011](http://2011.bionlp-st.org/home/epigenetics-and-post-translational-modifications)
 6. pc: [Pathway Curation (PC), 2013](http://2013.bionlp-st.org/tasks/pathway-curation)
 7. mlee: [Multi-Level Event Extraction (MLEE)](http://nactem.ac.uk/MLEE/)
 
 # 1. Preparation
+
 1. Install conda environment
 
 ```bash
@@ -48,6 +54,7 @@ sh setup/conda-install.sh
 ```
 
 4. Install requirements
+
 - Python 3.6.5
 - PyTorch (torch==1.1.0 torchvision==0.3.0, cuda92)
 - Python dependencies
@@ -57,6 +64,7 @@ pip install -r requirements.txt
 ```
 
 5. [Brat](https://github.com/nlplab/brat) for visualization
+
 - brat instructions](http://brat.nlplab.org/installation.html)
 
 ```bash
@@ -65,37 +73,50 @@ python2 standalone.py
 ```
 
 # 2. Training CG
+
 1. Download data and process
+
 - Download data
 - Process data to appropriate format
 - Tokenize texts and retrieve offsets
 - Data statistics
 - Download the processed event structures
+- The original BioNLP 2013 (for downloading CG data) seems unavailable recently. We found an alternative link
+  for [CG13 task](https://sites.google.com/site/bionlpst2013/tasks/cancer-genetics-cg-task). You may download the data
+  by yourself (we are not sure the data is the same as the original link, so please check by yourself).
 
 ```bash
 sh run/prepare-cg.sh
 ```
 
 2. Download models
+
 - Download SciBERT model from PyTorch AllenNLP
+
 ```bash
 sh run/download-bert.sh
 ```
 
 3. Generate configs
+
 - Configs for training CG task
+
 ```bash
 sh run/generate_configs.sh cg basic
 ```
+
 - Experiment name: basic, exp1, exp2, etc
 - Or running this debug mode (on a small data with several epochs)
+
 ```bash
 sh run/generate_configs-debug.sh cg debug
 ```
 
 4. Training
+
 - Pretrain layers (these need to be done before training the joint model)
 - Replace "basic" by "debug" to quickly try experiments on the small data (debug mode)
+
 ```bash
 sh run/train.sh experiments/cg/basic/configs/train-ner.yaml
 sh run/train.sh experiments/cg/basic/configs/train-rel.yaml
@@ -103,6 +124,7 @@ sh run/train.sh experiments/cg/basic/configs/train-ev.yaml
 ```
 
 - Train joint model: given gold entity
+
 ```bash
 sh run/train.sh experiments/cg/basic/configs/train-joint-gold.yaml
 ```
@@ -114,13 +136,16 @@ sh run/train.sh experiments/cg/basic/configs/train-joint-e2e.yaml
 ```
 
 5. Predict
+
 - Given gold entity
+
 ```bash
 sh run/predict.sh experiments/cg/basic/configs/predict-gold-dev.yaml
 sh run/predict.sh experiments/cg/basic/configs/predict-gold-test.yaml
 ```
 
 - End-to-end
+
 ```bash
 sh run/predict.sh experiments/cg/basic/configs/predict-e2e-dev.yaml
 sh run/predict.sh experiments/cg/basic/configs/predict-e2e-test.yaml
@@ -129,7 +154,9 @@ sh run/predict.sh experiments/cg/basic/configs/predict-e2e-test.yaml
 # 3. Predict (BioNLP tasks)
 
 ## 3.1. Prepare data
+
 1. Download corpora
+
 - To download the original data sets from BioNLP shared tasks.
 - [task] = cg, pc, ge11, etc
 
@@ -138,6 +165,7 @@ sh download.sh bionlp [task]
 ```
 
 2. Download our pre-trained DeepEventMine model on a given task
+
 - [Our trained models](https://b2share.eudat.eu/records/80d2de0c57d64419b722dc1afa375f28)
 - [Our scores](https://b2share.eudat.eu/api/files/3cf6c1f4-5eed-4ee3-99c5-d99f5f011be3/scores.tar.gz)
 - [task] = cg (or pc, ge11, epi, etc)
@@ -147,14 +175,18 @@ sh download.sh deepeventmine [task]
 ```
 
 3. Preprocess data
+
 - Tokenize texts and prepare data for prediction
+
 ```bash
 sh preprocess.sh bionlp
 ```
 
 4. Generate configs
+
 - If using GPU: [gpu] = 0, otherwise: [gpu] = -1
 - [task] = cg, pc, etc
+
 ```bash
 sh run.sh config [task] [gpu]
 ```
@@ -162,6 +194,7 @@ sh run.sh config [task] [gpu]
 ## 3.2. Predict
 
 1. For development and test sets (given gold entities)
+
 - CG task: [task] = cg
 - PC task: [task] = pc
 - Similarly for: ge11, ge13, epi, id, mlee
@@ -170,7 +203,9 @@ sh run.sh config [task] [gpu]
 sh run.sh predict [task] gold dev
 sh run.sh predict [task] gold test
 ```
+
 - Check the output in the path
+
 ```bash
 experiments/[task]/predict-gold-dev/
 experiments/[task]/predict-gold-test/
@@ -179,6 +214,7 @@ experiments/[task]/predict-gold-test/
 ## 3.3. Evaluate
 
 1. Retrieve the original offsets and create zip format
+
 ```bash
 sh run.sh offset [task] gold dev
 sh run.sh offset [task] gold test
@@ -187,10 +223,14 @@ sh run.sh offset [task] gold test
 2. Submit the zipped file to the shared task evaluation sites:
 
 - [CG Test](http://weaver.nlplab.org/~bionlp-st/BioNLP-ST-2013/CG/submission/)
-- [GE11 Test](http://bionlp-st.dbcls.jp/GE/2011/eval-test/), [GE11 Devel](http://bionlp-st.dbcls.jp/GE/2011/eval-development/)
-- [GE13 Test](http://bionlp-st.dbcls.jp/GE/2013/eval-test/), [GE13 Devel](http://bionlp-st.dbcls.jp/GE/2013/eval-development/)
-- [ID Test](http://weaver.nlplab.org/~bionlp-st/BioNLP-ST/ID/test-eval.html), [ID Devel](http://weaver.nlplab.org/~bionlp-st/BioNLP-ST/ID/devel-eval.htm)
-- [EPI Test](http://weaver.nlplab.org/~bionlp-st/BioNLP-ST/EPI/test-eval.html), [EPI Devel](http://weaver.nlplab.org/~bionlp-st/BioNLP-ST/EPI/devel-eval.htm)
+- [GE11 Test](http://bionlp-st.dbcls.jp/GE/2011/eval-test/)
+  , [GE11 Devel](http://bionlp-st.dbcls.jp/GE/2011/eval-development/)
+- [GE13 Test](http://bionlp-st.dbcls.jp/GE/2013/eval-test/)
+  , [GE13 Devel](http://bionlp-st.dbcls.jp/GE/2013/eval-development/)
+- [ID Test](http://weaver.nlplab.org/~bionlp-st/BioNLP-ST/ID/test-eval.html)
+  , [ID Devel](http://weaver.nlplab.org/~bionlp-st/BioNLP-ST/ID/devel-eval.htm)
+- [EPI Test](http://weaver.nlplab.org/~bionlp-st/BioNLP-ST/EPI/test-eval.html)
+  , [EPI Devel](http://weaver.nlplab.org/~bionlp-st/BioNLP-ST/EPI/devel-eval.htm)
 - [PC Test](http://weaver.nlplab.org/~bionlp-st/BioNLP-ST-2013/PC/submission/)
 
 3. Evaluate events
@@ -205,18 +245,23 @@ sh run.sh eval [task] gold dev sp
 # 4. End-to-end
 
 ## 4.1. Input: a single PMID or PMCID
+
 - Abstract
+
 ```bash
 sh pubmed.sh e2e pmid 1370299 cg 0
 ```
 
 - Full text
+
 ```bash
 sh pubmed.sh e2e pmcid PMC4353630 cg 0
 ```
 
-- Input: [PMID: 1370299](https://pubmed.ncbi.nlm.nih.gov/1370299/),  [PMCID: PMC4353630](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4353630/) (a single PubMed ID to get raw text)
-- Model to predict: DeepEventMine trained on [cg (Cancer Genetics 2013)](http://2013.bionlp-st.org/tasks/cancer-genetics), (other options: pc, ge11, etc)
+- Input: [PMID: 1370299](https://pubmed.ncbi.nlm.nih.gov/1370299/)
+  ,  [PMCID: PMC4353630](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4353630/) (a single PubMed ID to get raw text)
+- Model to predict: DeepEventMine trained
+  on [cg (Cancer Genetics 2013)](http://2013.bionlp-st.org/tasks/cancer-genetics), (other options: pc, ge11, etc)
 - GPU: 0 (if CPU: -1)
 - Output: in brat format and [brat visualization](http://brat.nlplab.org)
 
@@ -250,6 +295,7 @@ E24	Positive_regulation:T61 Theme:E10
 
 - Given an arbitrary name for your raw text data, for example "my-pubmed"
 - Prepare a list of PMID and PMCID in the path
+
 ```bash
 data/my-pubmed/pmid.txt
 ```
@@ -262,6 +308,7 @@ sh pubmed.sh e2e pmids my-pubmed cg 0
 
 - Given an arbitrary name for your raw text data, for example "my-pubmed"
 - Prepare your raw text files in the path
+
 ```bash
 data/my-pubmed/text/PMID-*.txt
 data/my-pubmed/text/PMC-*.txt
@@ -293,6 +340,7 @@ data/my-pubmed/text/PMC-*.txt
 ### Get raw text
 
 1. PubMed ID list
+
 - In order to get full text given PMC ID, the text should be available in ePub (for our current version).
 - Prepare your list of PubMed ID and PMC ID in the path
 
@@ -301,12 +349,15 @@ data/my-pubmed/pmid.txt
 ```
 
 - Get text from the PubMed ID
+
 ```bash
 sh pubmed.sh pmids my-pubmed
 ```
 
 2. PubMed ID
+
 - You can also get text by directly input a PubMed or PMC ID
+
 ```bash
 sh pubmed.sh pmid 1370299
 sh pubmed.sh pmcid PMC4353630
@@ -321,6 +372,7 @@ sh pubmed.sh preprocess my-pubmed
 ## 5.3. Predict
 
 1. Generate config
+
 - Generate config for prediction
 - The data name to predict: my-pubmed
 - The trained model used for predict: cg (or pc, ge11, etc)
@@ -343,6 +395,7 @@ sh pubmed.sh offset my-pubmed
 ```
 
 - Check the output in
+
 ```bash
 experiments/my-pubmed/results/ev-last/my-pubmed-brat
 ```
@@ -353,11 +406,13 @@ experiments/my-pubmed/results/ev-last/my-pubmed-brat
 
 - Copy the predicted data into the brat folder to visualize
 - For the raw text prediction:
+
 ```bash
 sh pubmed.sh brat my-pubmed cg
 ```
 
 - Or for the shared task
+
 ```bash
 sh run.sh brat [task] gold dev
 sh run.sh brat [task] gold test
@@ -373,10 +428,13 @@ brat/brat-v1.3_Crunchy_Frog/data/[task]-brat
 ```
 
 # 7. Acknowledgements
-This work is based on results obtained from a project commissioned by the New Energy and Industrial Technology Development Organization (NEDO).
-This work is also supported by PRISM (Public/Private R&D Investment Strategic Expansion PrograM).
+
+This work is based on results obtained from a project commissioned by the New Energy and Industrial Technology
+Development Organization (NEDO). This work is also supported by PRISM (Public/Private R&D Investment Strategic Expansion
+PrograM).
 
 # 8. Citation
+
 ```bash
 @article{10.1093/bioinformatics/btaa540,
     author = {Trieu, Hai-Long and Tran, Thy Thy and Duong, Khoa N A and Nguyen, Anh and Miwa, Makoto and Ananiadou, Sophia},

From d18e2d725f53a4142b04eb09a2e5bf83635d77b4 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Mon, 21 Mar 2022 23:59:03 +0900
Subject: [PATCH 26/70] readme

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0bc014c..85296a6 100644
--- a/README.md
+++ b/README.md
@@ -81,9 +81,9 @@ python2 standalone.py
 - Tokenize texts and retrieve offsets
 - Data statistics
 - Download the processed event structures
-- The original BioNLP 2013 (for downloading CG data) seems unavailable recently. We found an alternative link
+- The [original BioNLP 2013](http://2013.bionlp-st.org/tasks/cancer-genetics) (for downloading CG data) seems unavailable recently. We found an alternative link
   for [CG13 task](https://sites.google.com/site/bionlpst2013/tasks/cancer-genetics-cg-task). You may download the data
-  by yourself (we are not sure the data is the same as the original link, so please check by yourself).
+  by yourself. We are not sure the data is the same as the original link, so please check by yourself or contact the workshop's organizers.
 
 ```bash
 sh run/prepare-cg.sh

From 22fac688423af9947762443fee5189f3fdc6b9ad Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:07:59 +0900
Subject: [PATCH 27/70] prediction configs

---
 pubmed.sh                        |   4 +-
 run.sh                           |   2 +-
 scripts/generate_pred_configs.py | 168 +++++++++++++++++++++++++++++++
 3 files changed, 171 insertions(+), 3 deletions(-)
 create mode 100644 scripts/generate_pred_configs.py

diff --git a/pubmed.sh b/pubmed.sh
index ae22dbd..7a42ea9 100644
--- a/pubmed.sh
+++ b/pubmed.sh
@@ -52,7 +52,7 @@ elif [ "$TASK" = "config" ]; then
     GPU=$4
     EXP_DIR="experiments/"
 
-    python scripts/generate_configs.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
+    python scripts/generate_pred_configs.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
 
 # predict
 elif [ "$TASK" = "predict" ]; then
@@ -147,7 +147,7 @@ elif [ "$TASK" = "e2e" ]; then
     GPU=$5
     EXP_DIR="experiments/"
 
-    python scripts/generate_configs.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
+    python scripts/generate_pred_configs.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
 
     echo "--------------------------------"
     echo "4. Predict: "
diff --git a/run.sh b/run.sh
index 13b19e2..8be193f 100644
--- a/run.sh
+++ b/run.sh
@@ -12,7 +12,7 @@ if [ "$TASK" = "config" ]; then
 
     GPU=$3
     
-    python scripts/generate_configs.py $TASK_DIR $CORPUS_NAME $GPU
+    python scripts/generate_pred_configs.py $TASK_DIR $CORPUS_NAME $GPU
 
 # predict
 elif [ "$TASK" = "predict" ]; then
diff --git a/scripts/generate_pred_configs.py b/scripts/generate_pred_configs.py
new file mode 100644
index 0000000..4e07162
--- /dev/null
+++ b/scripts/generate_pred_configs.py
@@ -0,0 +1,168 @@
+"""Generating configs for training and evaluating models."""
+
+import os
+import sys
+
+sys.path.insert(0, '.')
+from utils import utils
+
+
+def write_config(datapath, config):
+    """Write config to file"""
+
+    with open(datapath, 'w') as outfile:
+        for key, value in config.items():
+
+            # format
+            if key == 'bert_model' or key == 'test_data' or key == 'ev_eval_script_path' or key == 'result_dir' or key == 'gpu':
+                outfile.write('\n')
+
+            outfile.write('{}: {}'.format(key, value))
+            outfile.write('\n')
+
+
+def gen_predict_config(predict_config, specific_config, eval_set, config_dir, model_name, taskdir):
+    """For joint prediction"""
+
+    # dev and test sets
+    if eval_set == 'dev' or eval_set == 'test':
+        predict_config['test_data'] = ''.join(["data/corpora/", model_name, "/", eval_set, "/"])
+        predict_config['result_dir'] = ''.join([taskdir, '/predict-gold-', eval_set, '/'])
+
+        # overwrite task config
+        overwrite_task_config(predict_config, specific_config)
+
+        write_config(os.path.join(config_dir, ''.join(['predict-gold-', eval_set, '.yaml'])), predict_config)
+
+    # for raw texts
+    elif eval_set == 'raw-text':
+        predict_config['test_data'] = ''.join(["data/processed-raw-text/", model_name, "/"])
+        predict_config['result_dir'] = ''.join([taskdir, '/predict-', eval_set, '/'])
+        predict_config['raw_text'] = True
+        predict_config['ner_predict_all'] = True
+
+        # overwrite task config
+        overwrite_task_config(predict_config, specific_config)
+
+        write_config(os.path.join(config_dir, ''.join(['predict-', eval_set, '.yaml'])), predict_config)
+
+
+def gen_predict_config_pubmed(predict_config, specific_config, config_dir, expdir, dataname):
+    predict_config['test_data'] = ''.join(["data/", dataname, "/processed-text/", "text/"])
+    predict_config['result_dir'] = ''.join([expdir, dataname, '/results/'])
+    predict_config['raw_text'] = True
+    predict_config['ner_predict_all'] = True
+
+    # overwrite task config
+    overwrite_task_config(predict_config, specific_config)
+    write_config(os.path.join(config_dir, ''.join(['predict-', dataname, '.yaml'])), predict_config)
+
+
+def overwrite_task_config(config, specific_config):
+    """Overwrite config for specific task."""
+
+    # add specific task config
+    for key, value in specific_config.items():
+        if key in config:
+            config[key] = value
+
+    return config
+
+
+def read_specific_config(task):
+    """Specific config for specific task."""
+
+    # open specific config
+    task_config_path = ''.join(['configs/', task, '.yaml'])
+
+    specific_config = {}
+
+    # check exist and read config
+    if os.path.exists(task_config_path):
+        with open(task_config_path, 'r') as stream:
+            specific_config = utils._ordered_load(stream)
+
+    return specific_config
+
+
+def generate_configs(taskdir, task, gpu):
+    """Generate configs for all."""
+
+    # create experiment dir
+    config_dir = os.path.join(taskdir, 'configs')
+    utils.makedir(config_dir)
+
+    # default setting
+    default_config_path = 'configs/default.yaml'
+    with open(default_config_path, 'r') as stream:
+        default_config = utils._ordered_load(stream)
+
+    # read config for specific task
+    specific_config = read_specific_config(task)
+
+    # generate config for each task
+    task_config = default_config.copy()
+    task_config['gpu'] = gpu
+    task_config['task_name'] = task_config['task_name'].replace('cg', task)
+    task_config['model_path'] = task_config['model_path'].replace('cg', task)
+    task_config['saved_params'] = task_config['saved_params'].replace('cg', task)
+    task_config['ev_eval_script_path'] = task_config['ev_eval_script_path'].replace('cg', task)
+
+    # predict config
+    predict_dev_config = task_config.copy()
+    gen_predict_config(predict_dev_config, specific_config, 'dev', config_dir, task, taskdir)
+
+    predict_test_config = task_config.copy()
+    gen_predict_config(predict_test_config, specific_config, 'test', config_dir, task, taskdir)
+
+    # for raw text
+    predict_test_config = task_config.copy()
+    gen_predict_config(predict_test_config, specific_config, 'raw-text', config_dir, task, taskdir)
+
+    print('Generate configs: Done!')
+
+    return
+
+
+def generate_configs_pubmed(expdir, dataname, model_name, gpu):
+    """Generate configs for all."""
+
+    # create experiment dir
+    config_dir = os.path.join(expdir, ''.join([dataname, '/configs']))
+    utils.makedir(config_dir)
+
+    # default setting
+    default_config_path = 'configs/default.yaml'
+    with open(default_config_path, 'r') as stream:
+        default_config = utils._ordered_load(stream)
+
+    # read config for specific task
+    specific_config = read_specific_config(model_name)
+
+    # generate config for each task
+    task_config = default_config.copy()
+    task_config['gpu'] = gpu
+    task_config['task_name'] = task_config['task_name'].replace('cg', model_name)
+    task_config['model_path'] = task_config['model_path'].replace('cg', model_name)
+    task_config['saved_params'] = task_config['saved_params'].replace('cg', model_name)
+    task_config['ev_eval_script_path'] = task_config['ev_eval_script_path'].replace('cg', model_name)
+
+    # for raw text
+    predict_test_config = task_config.copy()
+    gen_predict_config_pubmed(predict_test_config, specific_config, config_dir, expdir, dataname)
+
+    print('Generate configs: Done!')
+
+    return
+
+
+if __name__ == '__main__':
+    # generate_configs_pubmed("experiments/", "cg", "my-pubmed", 0)
+
+    # bionlp
+    if len(sys.argv) == 4:
+        generate_configs(sys.argv[1], sys.argv[2], sys.argv[3])
+
+    # pubmed
+    elif len(sys.argv) == 5:
+        generate_configs_pubmed(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])

From 0103c80a09ed25eb3b03a5a45e4598cba63eb7fd Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:12:19 +0900
Subject: [PATCH 28/70] bionlp prediction configs

---
 configs/bio-default.yaml                      | 25 +++++++++++++++++++
 pubmed.sh                                     |  4 +--
 run.sh                                        |  2 +-
 ...red_configs.py => generate_bio_configs.py} |  4 +--
 4 files changed, 30 insertions(+), 5 deletions(-)
 create mode 100644 configs/bio-default.yaml
 rename scripts/{generate_pred_configs.py => generate_bio_configs.py} (98%)

diff --git a/configs/bio-default.yaml b/configs/bio-default.yaml
new file mode 100644
index 0000000..67c56ca
--- /dev/null
+++ b/configs/bio-default.yaml
@@ -0,0 +1,25 @@
+# Configuration file.
+
+task_name: cg
+
+# model
+bert_model: data/bert/scibert_scivocab_cased
+model_path: data/models/cg/model/
+saved_params: data/models/cg/cg.param
+
+# data
+test_data: ..
+
+# eval
+ev_eval_script_path: eval/scripts/eval-ev-cg.py
+a2_entities: []
+raw_text: False
+ner_predict_all: False
+
+# output
+result_dir: ..
+
+# params
+gpu: -1
+batchsize: 16
+seed: 42
\ No newline at end of file
diff --git a/pubmed.sh b/pubmed.sh
index 7a42ea9..569019d 100644
--- a/pubmed.sh
+++ b/pubmed.sh
@@ -52,7 +52,7 @@ elif [ "$TASK" = "config" ]; then
     GPU=$4
     EXP_DIR="experiments/"
 
-    python scripts/generate_pred_configs.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
+    python scripts/generate_bio_configs.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
 
 # predict
 elif [ "$TASK" = "predict" ]; then
@@ -147,7 +147,7 @@ elif [ "$TASK" = "e2e" ]; then
     GPU=$5
     EXP_DIR="experiments/"
 
-    python scripts/generate_pred_configs.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
+    python scripts/generate_bio_configs.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
 
     echo "--------------------------------"
     echo "4. Predict: "
diff --git a/run.sh b/run.sh
index 8be193f..c089a95 100644
--- a/run.sh
+++ b/run.sh
@@ -12,7 +12,7 @@ if [ "$TASK" = "config" ]; then
 
     GPU=$3
     
-    python scripts/generate_pred_configs.py $TASK_DIR $CORPUS_NAME $GPU
+    python scripts/generate_bio_configs.py $TASK_DIR $CORPUS_NAME $GPU
 
 # predict
 elif [ "$TASK" = "predict" ]; then
diff --git a/scripts/generate_pred_configs.py b/scripts/generate_bio_configs.py
similarity index 98%
rename from scripts/generate_pred_configs.py
rename to scripts/generate_bio_configs.py
index 4e07162..7944174 100644
--- a/scripts/generate_pred_configs.py
+++ b/scripts/generate_bio_configs.py
@@ -93,7 +93,7 @@ def generate_configs(taskdir, task, gpu):
     utils.makedir(config_dir)
 
     # default setting
-    default_config_path = 'configs/default.yaml'
+    default_config_path = 'configs/bio-default.yaml'
     with open(default_config_path, 'r') as stream:
         default_config = utils._ordered_load(stream)
 
@@ -132,7 +132,7 @@ def generate_configs_pubmed(expdir, dataname, model_name, gpu):
     utils.makedir(config_dir)
 
     # default setting
-    default_config_path = 'configs/default.yaml'
+    default_config_path = 'configs/bio-default.yaml'
     with open(default_config_path, 'r') as stream:
         default_config = utils._ordered_load(stream)
 

From ba13b99f9d8a8c0bbc0c35685a12cb80008bbfa8 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:17:25 +0900
Subject: [PATCH 29/70] prediction path

---
 scripts/generate_bio_configs.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/generate_bio_configs.py b/scripts/generate_bio_configs.py
index 7944174..e4153dd 100644
--- a/scripts/generate_bio_configs.py
+++ b/scripts/generate_bio_configs.py
@@ -27,7 +27,7 @@ def gen_predict_config(predict_config, specific_config, eval_set, config_dir, mo
     # dev and test sets
     if eval_set == 'dev' or eval_set == 'test':
         predict_config['test_data'] = ''.join(["data/corpora/", model_name, "/", eval_set, "/"])
-        predict_config['result_dir'] = ''.join([taskdir, '/predict-gold-', eval_set, '/'])
+        predict_config['result_dir'] = ''.join([taskdir, 'deepem-bionlp', '/predict-gold-', eval_set, '/'])
 
         # overwrite task config
         overwrite_task_config(predict_config, specific_config)
@@ -37,7 +37,7 @@ def gen_predict_config(predict_config, specific_config, eval_set, config_dir, mo
     # for raw texts
     elif eval_set == 'raw-text':
         predict_config['test_data'] = ''.join(["data/processed-raw-text/", model_name, "/"])
-        predict_config['result_dir'] = ''.join([taskdir, '/predict-', eval_set, '/'])
+        predict_config['result_dir'] = ''.join([taskdir, 'deepem-bionlp', '/predict-', eval_set, '/'])
         predict_config['raw_text'] = True
         predict_config['ner_predict_all'] = True
 
@@ -49,7 +49,7 @@ def gen_predict_config(predict_config, specific_config, eval_set, config_dir, mo
 
 def gen_predict_config_pubmed(predict_config, specific_config, config_dir, expdir, dataname):
     predict_config['test_data'] = ''.join(["data/", dataname, "/processed-text/", "text/"])
-    predict_config['result_dir'] = ''.join([expdir, dataname, '/results/'])
+    predict_config['result_dir'] = ''.join([expdir, dataname, 'deepem-bionlp', '/results/'])
     predict_config['raw_text'] = True
     predict_config['ner_predict_all'] = True
 
@@ -128,7 +128,7 @@ def generate_configs_pubmed(expdir, dataname, model_name, gpu):
     """Generate configs for all."""
 
     # create experiment dir
-    config_dir = os.path.join(expdir, ''.join([dataname, '/configs']))
+    config_dir = os.path.join(expdir, ''.join([dataname, 'deepem-bionlp', '/configs']))
     utils.makedir(config_dir)
 
     # default setting

From 3f98bcfdbab4dd83f4bb98acd9fd43478959786e Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:18:48 +0900
Subject: [PATCH 30/70] prediction path

---
 scripts/generate_bio_configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/generate_bio_configs.py b/scripts/generate_bio_configs.py
index e4153dd..2ed280e 100644
--- a/scripts/generate_bio_configs.py
+++ b/scripts/generate_bio_configs.py
@@ -128,7 +128,7 @@ def generate_configs_pubmed(expdir, dataname, model_name, gpu):
     """Generate configs for all."""
 
     # create experiment dir
-    config_dir = os.path.join(expdir, ''.join([dataname, 'deepem-bionlp', '/configs']))
+    config_dir = os.path.join(expdir, ''.join([dataname, '/deepem-bionlp', '/configs']))
     utils.makedir(config_dir)
 
     # default setting

From 7b9116e7ed0c91ad4fd0f1709156178d4f284c67 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:23:31 +0900
Subject: [PATCH 31/70] prediction configs

---
 scripts/generate_bio_configs.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/generate_bio_configs.py b/scripts/generate_bio_configs.py
index 2ed280e..fde52ce 100644
--- a/scripts/generate_bio_configs.py
+++ b/scripts/generate_bio_configs.py
@@ -89,7 +89,8 @@ def generate_configs(taskdir, task, gpu):
     """Generate configs for all."""
 
     # create experiment dir
-    config_dir = os.path.join(taskdir, 'configs')
+    config_dir = os.path.join(taskdir, '/deepem-bionlp/configs')
+
     utils.makedir(config_dir)
 
     # default setting

From 3c63c93846cb6255a2225fc78bf6773721c919b1 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:26:08 +0900
Subject: [PATCH 32/70] fix bug

---
 scripts/generate_bio_configs.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/generate_bio_configs.py b/scripts/generate_bio_configs.py
index fde52ce..2dbb481 100644
--- a/scripts/generate_bio_configs.py
+++ b/scripts/generate_bio_configs.py
@@ -89,8 +89,7 @@ def generate_configs(taskdir, task, gpu):
     """Generate configs for all."""
 
     # create experiment dir
-    config_dir = os.path.join(taskdir, '/deepem-bionlp/configs')
-
+    config_dir = os.path.join(taskdir, ''.join['/', 'deepem-bionlp', '/', 'configs'])
     utils.makedir(config_dir)
 
     # default setting

From 0af84d6c62ac12ce3c4c3bc52f3b4925e6f45f97 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:27:34 +0900
Subject: [PATCH 33/70] fix bug

---
 scripts/generate_bio_configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/generate_bio_configs.py b/scripts/generate_bio_configs.py
index 2dbb481..61f0807 100644
--- a/scripts/generate_bio_configs.py
+++ b/scripts/generate_bio_configs.py
@@ -89,7 +89,7 @@ def generate_configs(taskdir, task, gpu):
     """Generate configs for all."""
 
     # create experiment dir
-    config_dir = os.path.join(taskdir, ''.join['/', 'deepem-bionlp', '/', 'configs'])
+    config_dir = os.path.join(taskdir, ''.join['deepem-bionlp', '/', 'configs'])
     utils.makedir(config_dir)
 
     # default setting

From fed491a2f8066d240c01b488ed5769e344110aeb Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:30:06 +0900
Subject: [PATCH 34/70] fix bug

---
 scripts/generate_bio_configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/generate_bio_configs.py b/scripts/generate_bio_configs.py
index 61f0807..08f6a2d 100644
--- a/scripts/generate_bio_configs.py
+++ b/scripts/generate_bio_configs.py
@@ -89,7 +89,7 @@ def generate_configs(taskdir, task, gpu):
     """Generate configs for all."""
 
     # create experiment dir
-    config_dir = os.path.join(taskdir, ''.join['deepem-bionlp', '/', 'configs'])
+    config_dir = os.path.join(os.path.join(taskdir, 'deepem-bionlp'), 'configs')
     utils.makedir(config_dir)
 
     # default setting

From 0d453c4c712839f9b9d72e340c3b7391e36e5d1d Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:31:43 +0900
Subject: [PATCH 35/70] fix path

---
 scripts/generate_bio_configs.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/generate_bio_configs.py b/scripts/generate_bio_configs.py
index 08f6a2d..6a584e1 100644
--- a/scripts/generate_bio_configs.py
+++ b/scripts/generate_bio_configs.py
@@ -128,7 +128,8 @@ def generate_configs_pubmed(expdir, dataname, model_name, gpu):
     """Generate configs for all."""
 
     # create experiment dir
-    config_dir = os.path.join(expdir, ''.join([dataname, '/deepem-bionlp', '/configs']))
+    config_dir = os.path.join(os.path.join(expdir, dataname), 'deepem-bionlp')
+    config_dir = os.path.join(config_dir, 'configs')
     utils.makedir(config_dir)
 
     # default setting

From 1fa3839af70b81c05166f443eccb41633ba51f00 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:34:02 +0900
Subject: [PATCH 36/70] output path

---
 scripts/generate_bio_configs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/generate_bio_configs.py b/scripts/generate_bio_configs.py
index 6a584e1..b0291e4 100644
--- a/scripts/generate_bio_configs.py
+++ b/scripts/generate_bio_configs.py
@@ -27,7 +27,7 @@ def gen_predict_config(predict_config, specific_config, eval_set, config_dir, mo
     # dev and test sets
     if eval_set == 'dev' or eval_set == 'test':
         predict_config['test_data'] = ''.join(["data/corpora/", model_name, "/", eval_set, "/"])
-        predict_config['result_dir'] = ''.join([taskdir, 'deepem-bionlp', '/predict-gold-', eval_set, '/'])
+        predict_config['result_dir'] = ''.join([taskdir, '/deepem-bionlp', '/predict-gold-', eval_set, '/'])
 
         # overwrite task config
         overwrite_task_config(predict_config, specific_config)
@@ -37,7 +37,7 @@ def gen_predict_config(predict_config, specific_config, eval_set, config_dir, mo
     # for raw texts
     elif eval_set == 'raw-text':
         predict_config['test_data'] = ''.join(["data/processed-raw-text/", model_name, "/"])
-        predict_config['result_dir'] = ''.join([taskdir, 'deepem-bionlp', '/predict-', eval_set, '/'])
+        predict_config['result_dir'] = ''.join([taskdir, '/deepem-bionlp', '/predict-', eval_set, '/'])
         predict_config['raw_text'] = True
         predict_config['ner_predict_all'] = True
 
@@ -49,7 +49,7 @@ def gen_predict_config(predict_config, specific_config, eval_set, config_dir, mo
 
 def gen_predict_config_pubmed(predict_config, specific_config, config_dir, expdir, dataname):
     predict_config['test_data'] = ''.join(["data/", dataname, "/processed-text/", "text/"])
-    predict_config['result_dir'] = ''.join([expdir, dataname, 'deepem-bionlp', '/results/'])
+    predict_config['result_dir'] = ''.join([expdir, dataname, '/deepem-bionlp', '/results/'])
     predict_config['raw_text'] = True
     predict_config['ner_predict_all'] = True
 

From 5495e835bb7dd5950e0ae69246579f6b23cff16f Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:35:35 +0900
Subject: [PATCH 37/70] update config path

---
 run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run.sh b/run.sh
index c089a95..d420589 100644
--- a/run.sh
+++ b/run.sh
@@ -22,7 +22,7 @@ elif [ "$TASK" = "predict" ]; then
     DEV_TEST=$4 # predict for dev, test sets
 
     # predict
-    python predict.py --yaml $TASK_DIR/configs/$TASK-$GOLD_E2E-$DEV_TEST.yaml
+    python predict.py --yaml $TASK_DIR/deepem-bionlp/configs/$TASK-$GOLD_E2E-$DEV_TEST.yaml
 
 # retrieve offset
 elif [ "$TASK" = "offset" ]; then

From 1c41fda78f505c7f68c8213237ba363f14fc6fe7 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:42:16 +0900
Subject: [PATCH 38/70] load saved parameters

---
 configs/bio-default.yaml | 2 +-
 predict.py               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/bio-default.yaml b/configs/bio-default.yaml
index 67c56ca..d50278a 100644
--- a/configs/bio-default.yaml
+++ b/configs/bio-default.yaml
@@ -5,7 +5,7 @@ task_name: cg
 # model
 bert_model: data/bert/scibert_scivocab_cased
 model_path: data/models/cg/model/
-saved_params: data/models/cg/cg.param
+params: data/models/cg/cg.param
 
 # data
 test_data: ..
diff --git a/predict.py b/predict.py
index d237630..7552a6a 100644
--- a/predict.py
+++ b/predict.py
@@ -44,7 +44,6 @@ def main():
 
     # Load configurations for prediction only
     test_data_dir = parameters['test_data']
-    params_dir = parameters['params']
     pipelines = parameters['pipelines']
     t_gpu = parameters['t_gpu']
     t_fp16 = parameters['t_fp16']
@@ -59,6 +58,7 @@ def main():
     bert_model = parameters['bert_model']
 
     # Load pre-trained parameters
+    params_dir = parameters['params']
     with open(params_dir, "rb") as f:
         parameters = pickle.load(f)
 

From a21989c5c387c9f2defbe65f3c7a860b3adba8de Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:43:27 +0900
Subject: [PATCH 39/70] savaed params config

---
 scripts/generate_bio_configs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/generate_bio_configs.py b/scripts/generate_bio_configs.py
index b0291e4..7f18420 100644
--- a/scripts/generate_bio_configs.py
+++ b/scripts/generate_bio_configs.py
@@ -105,7 +105,7 @@ def generate_configs(taskdir, task, gpu):
     task_config['gpu'] = gpu
     task_config['task_name'] = task_config['task_name'].replace('cg', task)
     task_config['model_path'] = task_config['model_path'].replace('cg', task)
-    task_config['saved_params'] = task_config['saved_params'].replace('cg', task)
+    task_config['params'] = task_config['params'].replace('cg', task)
     task_config['ev_eval_script_path'] = task_config['ev_eval_script_path'].replace('cg', task)
 
     # predict config
@@ -145,7 +145,7 @@ def generate_configs_pubmed(expdir, dataname, model_name, gpu):
     task_config['gpu'] = gpu
     task_config['task_name'] = task_config['task_name'].replace('cg', model_name)
     task_config['model_path'] = task_config['model_path'].replace('cg', model_name)
-    task_config['saved_params'] = task_config['saved_params'].replace('cg', model_name)
+    task_config['params'] = task_config['params'].replace('cg', model_name)
     task_config['ev_eval_script_path'] = task_config['ev_eval_script_path'].replace('cg', model_name)
 
     # for raw text

From 19ceca75ed80e36cc9a1ca0790140c94d3762302 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:51:02 +0900
Subject: [PATCH 40/70] prediction scripts

---
 predict.py                                    |   2 +-
 pubmed.sh                                     |   8 +-
 run.sh                                        |   4 +-
 ...bio_configs.py => generate_configs_bio.py} |   4 +-
 scripts/predict_bio.py                        | 114 ++++++++++++++++++
 5 files changed, 123 insertions(+), 9 deletions(-)
 rename scripts/{generate_bio_configs.py => generate_configs_bio.py} (97%)
 create mode 100644 scripts/predict_bio.py

diff --git a/predict.py b/predict.py
index 7552a6a..d237630 100644
--- a/predict.py
+++ b/predict.py
@@ -44,6 +44,7 @@ def main():
 
     # Load configurations for prediction only
     test_data_dir = parameters['test_data']
+    params_dir = parameters['params']
     pipelines = parameters['pipelines']
     t_gpu = parameters['t_gpu']
     t_fp16 = parameters['t_fp16']
@@ -58,7 +59,6 @@ def main():
     bert_model = parameters['bert_model']
 
     # Load pre-trained parameters
-    params_dir = parameters['params']
     with open(params_dir, "rb") as f:
         parameters = pickle.load(f)
 
diff --git a/pubmed.sh b/pubmed.sh
index 569019d..e9015a6 100644
--- a/pubmed.sh
+++ b/pubmed.sh
@@ -52,7 +52,7 @@ elif [ "$TASK" = "config" ]; then
     GPU=$4
     EXP_DIR="experiments/"
 
-    python scripts/generate_bio_configs.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
+    python scripts/generate_configs_bio.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
 
 # predict
 elif [ "$TASK" = "predict" ]; then
@@ -62,7 +62,7 @@ elif [ "$TASK" = "predict" ]; then
     EXP_DIR="experiments/$MY_DATA"
 
     # predict
-    python predict.py --yaml $EXP_DIR/configs/$TASK-$MY_DATA.yaml
+    python scripts/predict_bio.py --yaml $EXP_DIR/configs/$TASK-$MY_DATA.yaml
 
 # retrieve offset
 elif [ "$TASK" = "offset" ]; then
@@ -147,7 +147,7 @@ elif [ "$TASK" = "e2e" ]; then
     GPU=$5
     EXP_DIR="experiments/"
 
-    python scripts/generate_bio_configs.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
+    python scripts/generate_configs_bio.py $EXP_DIR $MY_DATA $MODEL_NAME $GPU
 
     echo "--------------------------------"
     echo "4. Predict: "
@@ -155,7 +155,7 @@ elif [ "$TASK" = "e2e" ]; then
     EXP_DIR="experiments/$MY_DATA"
 
     # predict
-    python predict.py --yaml $EXP_DIR/configs/predict-$MY_DATA.yaml
+    python scripts/predict_bio.py --yaml $EXP_DIR/configs/predict-$MY_DATA.yaml
 
     echo "--------------------------------"
     echo "5. Retrieve original offsets: "
diff --git a/run.sh b/run.sh
index d420589..6227d6f 100644
--- a/run.sh
+++ b/run.sh
@@ -12,7 +12,7 @@ if [ "$TASK" = "config" ]; then
 
     GPU=$3
     
-    python scripts/generate_bio_configs.py $TASK_DIR $CORPUS_NAME $GPU
+    python scripts/generate_configs_bio.py $TASK_DIR $CORPUS_NAME $GPU
 
 # predict
 elif [ "$TASK" = "predict" ]; then
@@ -22,7 +22,7 @@ elif [ "$TASK" = "predict" ]; then
     DEV_TEST=$4 # predict for dev, test sets
 
     # predict
-    python predict.py --yaml $TASK_DIR/deepem-bionlp/configs/$TASK-$GOLD_E2E-$DEV_TEST.yaml
+    python scripts/predict_bio.py --yaml $TASK_DIR/deepem-bionlp/configs/$TASK-$GOLD_E2E-$DEV_TEST.yaml
 
 # retrieve offset
 elif [ "$TASK" = "offset" ]; then
diff --git a/scripts/generate_bio_configs.py b/scripts/generate_configs_bio.py
similarity index 97%
rename from scripts/generate_bio_configs.py
rename to scripts/generate_configs_bio.py
index 7f18420..b0291e4 100644
--- a/scripts/generate_bio_configs.py
+++ b/scripts/generate_configs_bio.py
@@ -105,7 +105,7 @@ def generate_configs(taskdir, task, gpu):
     task_config['gpu'] = gpu
     task_config['task_name'] = task_config['task_name'].replace('cg', task)
     task_config['model_path'] = task_config['model_path'].replace('cg', task)
-    task_config['params'] = task_config['params'].replace('cg', task)
+    task_config['saved_params'] = task_config['saved_params'].replace('cg', task)
     task_config['ev_eval_script_path'] = task_config['ev_eval_script_path'].replace('cg', task)
 
     # predict config
@@ -145,7 +145,7 @@ def generate_configs_pubmed(expdir, dataname, model_name, gpu):
     task_config['gpu'] = gpu
     task_config['task_name'] = task_config['task_name'].replace('cg', model_name)
     task_config['model_path'] = task_config['model_path'].replace('cg', model_name)
-    task_config['params'] = task_config['params'].replace('cg', model_name)
+    task_config['saved_params'] = task_config['saved_params'].replace('cg', model_name)
     task_config['ev_eval_script_path'] = task_config['ev_eval_script_path'].replace('cg', model_name)
 
     # for raw text
diff --git a/scripts/predict_bio.py b/scripts/predict_bio.py
new file mode 100644
index 0000000..fc67343
--- /dev/null
+++ b/scripts/predict_bio.py
@@ -0,0 +1,114 @@
+import os
+import random
+import pickle
+import numpy as np
+import torch
+from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
+
+from eval.evaluate import predict
+
+from nets import deepEM
+from loader.prepData import prepdata
+from loader.prepNN import prep4nn
+from utils import utils
+
+
+def main():
+    # read predict config
+    # set config path by command line
+    inp_args = utils._parsing()
+    config_path = getattr(inp_args, 'yaml')
+
+    # set config path manually
+    # config_path = 'configs/debug.yaml'
+
+    with open(config_path, 'r') as stream:
+        pred_params = utils._ordered_load(stream)
+
+    # Fix seed for reproducibility
+    os.environ["PYTHONHASHSEED"] = str(pred_params['seed'])
+    random.seed(pred_params['seed'])
+    np.random.seed(pred_params['seed'])
+    torch.manual_seed(pred_params['seed'])
+
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+    # Load pre-trained parameters
+    with open(pred_params['saved_params'], "rb") as f:
+        parameters = pickle.load(f)
+
+    parameters['predict'] = True
+
+    # Set predict settings value for params
+    parameters['gpu'] = pred_params['gpu']
+    parameters['batchsize'] = pred_params['batchsize']
+    if parameters['gpu'] >= 0:
+        device = torch.device("cuda:" + str(parameters['gpu']) if torch.cuda.is_available() else "cpu")
+        torch.cuda.set_device(parameters['gpu'])
+    else:
+        device = torch.device("cpu")
+    parameters['device'] = device
+
+    # Set evaluation settings
+    parameters['test_data'] = pred_params['test_data']
+
+    parameters['bert_model'] = pred_params['bert_model']
+
+    result_dir = pred_params['result_dir']
+    if not os.path.exists(result_dir):
+        os.makedirs(result_dir)
+
+    parameters['result_dir'] = pred_params['result_dir']
+
+    # raw text
+    parameters['raw_text'] = pred_params['raw_text']
+    parameters['ner_predict_all'] = pred_params['raw_text']
+    parameters['a2_entities'] = pred_params['a2_entities']
+
+    # process data
+    test_data = prepdata.prep_input_data(pred_params['test_data'], parameters)
+    nntest_data, test_dataloader = read_test_data(test_data, parameters)
+
+    # model
+    deepee_model = deepEM.DeepEM(parameters)
+
+    model_path = pred_params['model_path']
+
+    # Load all models
+    utils.handle_checkpoints(model=deepee_model,
+                             checkpoint_dir=model_path,
+                             params={
+                                 'device': device
+                             },
+                             resume=True)
+
+    deepee_model.to(device)
+
+    predict(model=deepee_model,
+            result_dir=result_dir,
+            eval_dataloader=test_dataloader,
+            eval_data=nntest_data,
+            g_entity_ids_=test_data['g_entity_ids_'],
+            params=parameters)
+
+    # print('Done!')
+
+
+def read_test_data(test_data, params):
+    test = prep4nn.data2network(test_data, 'predict', params)
+
+    if len(test) == 0:
+        raise ValueError("Test set empty.")
+
+    test_data = prep4nn.torch_data_2_network(cdata2network=test, params=params, do_get_nn_data=True)
+    te_data_size = len(test_data['nn_data']['ids'])
+
+    test_data_ids = TensorDataset(torch.arange(te_data_size))
+    test_sampler = SequentialSampler(test_data_ids)
+    test_dataloader = DataLoader(test_data_ids, sampler=test_sampler, batch_size=params['batchsize'])
+    return test_data, test_dataloader
+
+
+if __name__ == '__main__':
+    main()

From d66972ef00487a763c46e1d8533c4d43643cc46c Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:51:50 +0900
Subject: [PATCH 41/70] fix bug

---
 configs/bio-default.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/bio-default.yaml b/configs/bio-default.yaml
index d50278a..67c56ca 100644
--- a/configs/bio-default.yaml
+++ b/configs/bio-default.yaml
@@ -5,7 +5,7 @@ task_name: cg
 # model
 bert_model: data/bert/scibert_scivocab_cased
 model_path: data/models/cg/model/
-params: data/models/cg/cg.param
+saved_params: data/models/cg/cg.param
 
 # data
 test_data: ..

From 7a875077b68ed90b5731176a5e96966ededd4352 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:53:10 +0900
Subject: [PATCH 42/70] python path

---
 pubmed.sh | 4 ++++
 run.sh    | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/pubmed.sh b/pubmed.sh
index e9015a6..76e3549 100644
--- a/pubmed.sh
+++ b/pubmed.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
 
+ROOT=$PWD
+export PYTHONPATH="${PYTHONPATH}:$ROOT"
+export PYTHONPATH="${PYTHONPATH}:$ROOT/eval"
+
 TASK=$1
 
 # Get Text from PubMed ID & PMC ID
diff --git a/run.sh b/run.sh
index 6227d6f..d6e57a8 100644
--- a/run.sh
+++ b/run.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
 
+ROOT=$PWD
+export PYTHONPATH="${PYTHONPATH}:$ROOT"
+export PYTHONPATH="${PYTHONPATH}:$ROOT/eval"
+
 TASK=$1 # generate configs, predict, retrieve offsets, evaluate
 CORPUS_NAME=$2 # cg, ge11, pc, etc
 

From babfeb31db7fd20470d67fa22349631cad703ca3 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:55:44 +0900
Subject: [PATCH 43/70] bionlp prediction

---
 eval/evaluate.py       | 164 +++++++++++++++++++++++++++++++++++++++++
 scripts/predict_bio.py |   4 +-
 2 files changed, 166 insertions(+), 2 deletions(-)

diff --git a/eval/evaluate.py b/eval/evaluate.py
index 7033e98..f3a01c2 100644
--- a/eval/evaluate.py
+++ b/eval/evaluate.py
@@ -504,3 +504,167 @@ def print_scores(k, v, stoso):
             v['macro'][stoso + '_p'], v['macro'][stoso + '_r'], v['macro'][stoso + '_f']), end="",
     )
     print()
+
+def predict_bio(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, params):
+    mapping_id_tag = params['mappings']['nn_mapping']['id_tag_mapping']
+
+    # store predicted entities
+    ent_preds = []
+
+    # store predicted events
+    ev_preds = []
+
+    fidss, wordss, offsetss, sub_to_wordss, span_indicess = [], [], [], [], []
+
+    # entity and relation output
+    ent_anns = []
+    rel_anns = []
+
+    # Evaluation phase
+    model.eval()
+
+    all_ner_preds, all_ner_golds, all_ner_terms = [], [], []
+
+    is_eval_ev = False
+
+    for step, batch in enumerate(
+            tqdm(eval_dataloader, desc="Iteration", leave=False)
+    ):
+        eval_data_ids = batch
+        tensors = utils.get_tensors(eval_data_ids, eval_data, params)
+
+        nn_tokens, nn_ids, nn_token_mask, nn_attention_mask, nn_span_indices, nn_span_labels, nn_span_labels_match_rel, nn_entity_masks, nn_trigger_masks, _, \
+        etypes, _ = tensors
+
+        fids = [
+            eval_data["fids"][data_id] for data_id in eval_data_ids[0].tolist()
+        ]
+        offsets = [
+            eval_data["offsets"][data_id]
+            for data_id in eval_data_ids[0].tolist()
+        ]
+        words = [
+            eval_data["words"][data_id] for data_id in eval_data_ids[0].tolist()
+        ]
+        sub_to_words = [
+            eval_data["sub_to_words"][data_id]
+            for data_id in eval_data_ids[0].tolist()
+        ]
+        subwords = [
+            eval_data["subwords"][data_id]
+            for data_id in eval_data_ids[0].tolist()
+        ]
+        gold_entities = [
+            eval_data["entities"][data_id]
+            for data_id in eval_data_ids[0].tolist()
+        ]
+
+        with torch.no_grad():
+            ner_out, rel_out, ev_out = model(tensors, params)
+
+        ner_preds = ner_out['preds']
+
+        ner_terms = ner_out['terms']
+
+        all_ner_terms.append(ner_terms)
+
+        for sentence_idx, ner_pred in enumerate(ner_preds):
+            all_ner_golds.append(
+                [
+                    (
+                        sub_to_words[sentence_idx][span_start],
+                        sub_to_words[sentence_idx][span_end],
+                        mapping_id_tag[label_id],
+                    )
+                    for (
+                            span_start,
+                            span_end,
+                        ), label_ids in gold_entities[sentence_idx].items()
+                    for label_id in label_ids
+                ]
+            )
+
+            pred_entities = []
+            for span_id, ner_pred_id in enumerate(ner_pred):
+                span_start, span_end = nn_span_indices[sentence_idx][span_id]
+                span_start, span_end = span_start.item(), span_end.item()
+                if (ner_pred_id > 0
+                        and span_start in sub_to_words[sentence_idx]
+                        and span_end in sub_to_words[sentence_idx]
+                ):
+                    pred_entities.append(
+                        (
+                            sub_to_words[sentence_idx][span_start],
+                            sub_to_words[sentence_idx][span_end],
+                            mapping_id_tag[ner_pred_id],
+                        )
+                    )
+            all_ner_preds.append(pred_entities)
+
+        # entity prediction
+        ent_ann = {'span_indices': nn_span_indices, 'ner_preds': ner_out['preds'], 'words': words,
+                   'offsets': offsets, 'sub_to_words': sub_to_words, 'subwords': subwords,
+                   'ner_terms': ner_terms}
+        ent_anns.append(ent_ann)
+
+        fidss.append(fids)
+
+        wordss.append(words)
+        offsetss.append(offsets)
+        sub_to_wordss.append(sub_to_words)
+
+        # relation prediction
+        if rel_out != None:
+            pairs_idx = rel_out['pairs_idx']
+            rel_pred = rel_out['preds']
+
+            rel_ann = {'pairs_idx': pairs_idx, 'rel_preds': rel_pred}
+            rel_anns.append(rel_ann)
+        else:
+            rel_anns.append({})
+
+        # event prediction
+        if ev_out != None:
+            # add predicted entity
+            ent_preds.append(ner_out["nner_preds"])
+
+            # add predicted events
+            ev_preds.append(ev_out)
+
+            span_indicess.append(
+                [
+                    indice.detach().cpu().numpy()
+                    for indice in ner_out["span_indices"]
+                ]
+            )
+            is_eval_ev = True
+        else:
+            ent_preds.append([])
+            ev_preds.append([])
+
+            span_indicess.append([])
+
+        # Clear GPU unused RAM:
+        if params['gpu'] >= 0:
+            torch.cuda.empty_cache()
+    # write entity and relation prediction
+    _ = write_entity_relations(
+        result_dir=result_dir,
+        fidss=fidss,
+        ent_anns=ent_anns,
+        rel_anns=rel_anns,
+        params=params
+    )
+
+    if is_eval_ev > 0:
+        write_events(fids=fidss,
+                     all_ent_preds=ent_preds,
+                     all_words=wordss,
+                     all_offsets=offsetss,
+                     all_span_terms=all_ner_terms,
+                     all_span_indices=span_indicess,
+                     all_sub_to_words=sub_to_wordss,
+                     all_ev_preds=ev_preds,
+                     g_entity_ids_=g_entity_ids_,
+                     params=params,
+                     result_dir=result_dir)
\ No newline at end of file
diff --git a/scripts/predict_bio.py b/scripts/predict_bio.py
index fc67343..065845a 100644
--- a/scripts/predict_bio.py
+++ b/scripts/predict_bio.py
@@ -5,7 +5,7 @@
 import torch
 from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
 
-from eval.evaluate import predict
+from eval.evaluate import predict_bio
 
 from nets import deepEM
 from loader.prepData import prepdata
@@ -85,7 +85,7 @@ def main():
 
     deepee_model.to(device)
 
-    predict(model=deepee_model,
+    predict_bio(model=deepee_model,
             result_dir=result_dir,
             eval_dataloader=test_dataloader,
             eval_data=nntest_data,

From c44d387a19b99c4776b24436e0887cb4445dcd88 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:56:18 +0900
Subject: [PATCH 44/70] fix path

---
 scripts/predict_bio.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/predict_bio.py b/scripts/predict_bio.py
index 065845a..76ad8d4 100644
--- a/scripts/predict_bio.py
+++ b/scripts/predict_bio.py
@@ -7,7 +7,7 @@
 
 from eval.evaluate import predict_bio
 
-from nets import deepEM
+from model import deepEM
 from loader.prepData import prepdata
 from loader.prepNN import prep4nn
 from utils import utils

From 246a1056141dd34f6855d2a565018b08bfa1292d Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:57:52 +0900
Subject: [PATCH 45/70] setup

---
 README.md                                  | 2 +-
 requirements.txt => setup/requirements.txt | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename requirements.txt => setup/requirements.txt (100%)

diff --git a/README.md b/README.md
index 85296a6..4111ed0 100644
--- a/README.md
+++ b/README.md
@@ -60,7 +60,7 @@ sh setup/conda-install.sh
 - Python dependencies
 
 ```bash
-pip install -r requirements.txt
+pip install -r setup/requirements.txt
 ```
 
 5. [Brat](https://github.com/nlplab/brat) for visualization
diff --git a/requirements.txt b/setup/requirements.txt
similarity index 100%
rename from requirements.txt
rename to setup/requirements.txt

From 5eec9122a805851e9936fbb6f29e1254d13ca5f7 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 00:59:00 +0900
Subject: [PATCH 46/70] sklearn version

---
 setup/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup/requirements.txt b/setup/requirements.txt
index d31bf44..b84c69c 100644
--- a/setup/requirements.txt
+++ b/setup/requirements.txt
@@ -7,7 +7,7 @@ loguru
 tabulate
 pyyaml
 texttable
-sklearn
+sklearn==0.23.2
 pytorch-nlp
 tqdm
 requests

From 7018136da5539b0c5a2a0e4507b423799492081f Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:00:55 +0900
Subject: [PATCH 47/70] sklearn

---
 setup/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup/requirements.txt b/setup/requirements.txt
index b84c69c..9fc92d4 100644
--- a/setup/requirements.txt
+++ b/setup/requirements.txt
@@ -7,7 +7,7 @@ loguru
 tabulate
 pyyaml
 texttable
-sklearn==0.23.2
+scikit-learn==0.23.2
 pytorch-nlp
 tqdm
 requests

From c803f2c708c167838b26b0bac6f3ee82cc7e124b Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:04:26 +0900
Subject: [PATCH 48/70] fix bug

---
 scripts/predict_bio.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/predict_bio.py b/scripts/predict_bio.py
index 76ad8d4..afc6544 100644
--- a/scripts/predict_bio.py
+++ b/scripts/predict_bio.py
@@ -96,7 +96,7 @@ def main():
 
 
 def read_test_data(test_data, params):
-    test = prep4nn.data2network(test_data, 'predict', params)
+    test, _ = prep4nn.data2network(test_data, 'predict', params)
 
     if len(test) == 0:
         raise ValueError("Test set empty.")

From 12205e394eb1957e400bb0240ba77c6125d391c9 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:06:32 +0900
Subject: [PATCH 49/70] fix bug

---
 scripts/predict_bio.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/predict_bio.py b/scripts/predict_bio.py
index afc6544..5f318b0 100644
--- a/scripts/predict_bio.py
+++ b/scripts/predict_bio.py
@@ -96,12 +96,12 @@ def main():
 
 
 def read_test_data(test_data, params):
-    test, _ = prep4nn.data2network(test_data, 'predict', params)
+    test, test_events_map = prep4nn.data2network(test_data, 'predict', params)
 
     if len(test) == 0:
         raise ValueError("Test set empty.")
 
-    test_data = prep4nn.torch_data_2_network(cdata2network=test, params=params, do_get_nn_data=True)
+    test_data = prep4nn.torch_data_2_network(cdata2network=test, events_map=test_events_map, params=params, do_get_nn_data=True)
     te_data_size = len(test_data['nn_data']['ids'])
 
     test_data_ids = TensorDataset(torch.arange(te_data_size))

From d740223002d7c6bbccc696afbebd14d9d308c608 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:10:31 +0900
Subject: [PATCH 50/70] process input

---
 loader/prepData/prepdata.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/loader/prepData/prepdata.py b/loader/prepData/prepdata.py
index cb5a6f0..d0f222d 100644
--- a/loader/prepData/prepdata.py
+++ b/loader/prepData/prepdata.py
@@ -51,5 +51,18 @@ def prep_input_data(files_fold, params):
         if diff:
             print(doc_name, sorted(diff, key=lambda _id: int(_id.replace("T", ""))))
 
+    # entity indices
+    g_entity_ids_ = OrderedDict()
+    for fid, fdata in entities0.items():
+        # get max entity id
+        eid_ = [eid for eid in fdata['ids'] if not eid.startswith('TR')]
+        ids_ = [int(eid.replace('T', '')) for eid in eid_]
+        if len(ids_) > 0:
+            max_id = max(ids_)
+        else:
+            max_id = 0
+        eid_.append(max_id)
+        g_entity_ids_[fid] = eid_
+
     return {'entities': entities1, 'triggers': triggers1, 'terms': terms0, 'relations': relations0, 'events': events0,
-            'sentences': sentences1, 'input': input1, 'structsTR': structsTR}
+            'sentences': sentences1, 'input': input1, 'structsTR': structsTR, 'g_entity_ids_': g_entity_ids_}

From 9666202b7b36f84e2e10ca9013c467d1ef607de4 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:11:14 +0900
Subject: [PATCH 51/70] fix bug

---
 loader/prepData/prepdata.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loader/prepData/prepdata.py b/loader/prepData/prepdata.py
index d0f222d..8204a2e 100644
--- a/loader/prepData/prepdata.py
+++ b/loader/prepData/prepdata.py
@@ -1,4 +1,5 @@
 """Load data from brat format and process for entity, trigger, relation, events."""
+from collections import OrderedDict
 
 from loader.prepData.brat import brat_loader
 from loader.prepData.sentence import prep_sentence_offsets, process_input

From e3b676321f778e4926f04e9f551476f1d7d82b65 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:15:27 +0900
Subject: [PATCH 52/70] fix bug

---
 eval/evaluate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/eval/evaluate.py b/eval/evaluate.py
index f3a01c2..2ea8115 100644
--- a/eval/evaluate.py
+++ b/eval/evaluate.py
@@ -533,8 +533,8 @@ def predict_bio(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, pa
         eval_data_ids = batch
         tensors = utils.get_tensors(eval_data_ids, eval_data, params)
 
-        nn_tokens, nn_ids, nn_token_mask, nn_attention_mask, nn_span_indices, nn_span_labels, nn_span_labels_match_rel, nn_entity_masks, nn_trigger_masks, _, \
-        etypes, _ = tensors
+        nn_tokens, nn_ids, nn_token_mask, nn_attention_mask, nn_span_indices, nn_span_labels, nn_span_labels_match_rel, nn_entity_masks, nn_trigger_masks, nn_gtruth, nn_l2r, _, \
+        nn_truth_ev, nn_ev_idxs, ev_lbls, etypes, _ = tensors
 
         fids = [
             eval_data["fids"][data_id] for data_id in eval_data_ids[0].tolist()

From a4c5deaa5c701a110e2bd4336a0f0834b2dd439c Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:21:13 +0900
Subject: [PATCH 53/70] nested events in prediction

---
 model/EVNet.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/model/EVNet.py b/model/EVNet.py
index 440b3b2..0fc2970 100644
--- a/model/EVNet.py
+++ b/model/EVNet.py
@@ -638,10 +638,10 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn, n_epoch):
         all_preds_output = []
 
         # flag to train nested event or not
-        if n_epoch >= self.params['ev_nested_epoch'] or self.params['predict']:
-            enable_nested_ev = True
-        else:
-            enable_nested_ev = False
+        enable_nested_ev = True
+        if not self.params['predict']:
+            if n_epoch < self.params['ev_nested_epoch']:
+                enable_nested_ev = False
 
         # flag to train modality or not
         if n_epoch >= self.params['modality_epoch'] or self.params['predict']:

From f879ca746c258dce3c8696c413f95c96f6c5de77 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:23:42 +0900
Subject: [PATCH 54/70] modality in prediction

---
 model/EVNet.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/model/EVNet.py b/model/EVNet.py
index 0fc2970..5ba06d2 100644
--- a/model/EVNet.py
+++ b/model/EVNet.py
@@ -637,17 +637,14 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn, n_epoch):
         # store output
         all_preds_output = []
 
-        # flag to train nested event or not
+        # flag to train nested event, train modality or not
         enable_nested_ev = True
+        enable_modality = True
         if not self.params['predict']:
             if n_epoch < self.params['ev_nested_epoch']:
                 enable_nested_ev = False
-
-        # flag to train modality or not
-        if n_epoch >= self.params['modality_epoch'] or self.params['predict']:
-            enable_modality = True
-        else:
-            enable_modality = False
+            if n_epoch < self.params['modality_epoch']:
+                enable_modality = False
 
         # store all predictions for flat and nested, maximum as 3 nested levels
         # TODO: revise the maximum nested level later. Now fix 3 levels

From 2eecdb167c692609cc77516d147d524ea5b75a2d Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:25:51 +0900
Subject: [PATCH 55/70] fix bug

---
 eval/evaluate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eval/evaluate.py b/eval/evaluate.py
index 2ea8115..344cd8b 100644
--- a/eval/evaluate.py
+++ b/eval/evaluate.py
@@ -560,7 +560,7 @@ def predict_bio(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, pa
         ]
 
         with torch.no_grad():
-            ner_out, rel_out, ev_out = model(tensors, params)
+            ner_out, rel_out, ev_out, _ = model(tensors, params)
 
         ner_preds = ner_out['preds']
 

From 98f5fe8d4b03b7e20a4619131d63ce173130dfce Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:35:00 +0900
Subject: [PATCH 56/70] write annotations

---
 eval/evaluate.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/eval/evaluate.py b/eval/evaluate.py
index 344cd8b..783dc4b 100644
--- a/eval/evaluate.py
+++ b/eval/evaluate.py
@@ -4,7 +4,7 @@
 from tqdm import tqdm
 
 from eval.evalEV import evaluate_ev
-from eval.evalRE import estimate_perf, estimate_rel
+from eval.evalRE import estimate_perf, estimate_rel, gen_annotation
 from eval.evalNER import eval_nner
 from scripts.pipeline_process import gen_ner_ann_files, gen_rel_ann_files
 from utils import utils
@@ -648,13 +648,14 @@ def predict_bio(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, pa
         if params['gpu'] >= 0:
             torch.cuda.empty_cache()
     # write entity and relation prediction
-    _ = write_entity_relations(
-        result_dir=result_dir,
-        fidss=fidss,
-        ent_anns=ent_anns,
-        rel_anns=rel_anns,
-        params=params
-    )
+    gen_annotation(fidss, ent_anns, rel_anns, params, result_dir)
+    # _ = write_entity_relations(
+    #     result_dir=result_dir,
+    #     fidss=fidss,
+    #     ent_anns=ent_anns,
+    #     rel_anns=rel_anns,
+    #     params=params
+    # )
 
     if is_eval_ev > 0:
         write_events(fids=fidss,

From 4c913d0c5bc4acfa985bcfa2f2b5a5591f40faeb Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:43:02 +0900
Subject: [PATCH 57/70] write output

---
 eval/evaluate.py | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/eval/evaluate.py b/eval/evaluate.py
index 783dc4b..eb093e0 100644
--- a/eval/evaluate.py
+++ b/eval/evaluate.py
@@ -505,6 +505,7 @@ def print_scores(k, v, stoso):
     )
     print()
 
+
 def predict_bio(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, params):
     mapping_id_tag = params['mappings']['nn_mapping']['id_tag_mapping']
 
@@ -658,14 +659,26 @@ def predict_bio(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, pa
     # )
 
     if is_eval_ev > 0:
-        write_events(fids=fidss,
-                     all_ent_preds=ent_preds,
-                     all_words=wordss,
-                     all_offsets=offsetss,
-                     all_span_terms=all_ner_terms,
-                     all_span_indices=span_indicess,
-                     all_sub_to_words=sub_to_wordss,
-                     all_ev_preds=ev_preds,
-                     g_entity_ids_=g_entity_ids_,
-                     params=params,
-                     result_dir=result_dir)
\ No newline at end of file
+        # write_events(fids=fidss,
+        #              all_ent_preds=ent_preds,
+        #              all_words=wordss,
+        #              all_offsets=offsetss,
+        #              all_span_terms=all_ner_terms,
+        #              all_span_indices=span_indicess,
+        #              all_sub_to_words=sub_to_wordss,
+        #              all_ev_preds=ev_preds,
+        #              g_entity_ids_=g_entity_ids_,
+        #              params=params,
+        #              result_dir=result_dir)
+
+        _ = evaluate_ev(fids=fidss,
+                        all_ent_preds=ent_preds,
+                        all_words=wordss,
+                        all_offsets=offsetss,
+                        all_span_terms=all_ner_terms,
+                        all_span_indices=span_indicess,
+                        all_sub_to_words=sub_to_wordss,
+                        all_ev_preds=ev_preds,
+                        params=params,
+                        gold_dir=eval_dir,
+                        result_dir=result_dir)

From 1ff214595bc458b996a03090152468dc5ab6f90c Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 01:51:03 +0900
Subject: [PATCH 58/70] fix bug

---
 eval/evaluate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eval/evaluate.py b/eval/evaluate.py
index eb093e0..e80ce6e 100644
--- a/eval/evaluate.py
+++ b/eval/evaluate.py
@@ -680,5 +680,5 @@ def predict_bio(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, pa
                         all_sub_to_words=sub_to_wordss,
                         all_ev_preds=ev_preds,
                         params=params,
-                        gold_dir=eval_dir,
+                        gold_dir=eval_data,
                         result_dir=result_dir)

From aad6562507b0d0fc5897765eaf6e6533cd69d8fb Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 02:06:06 +0900
Subject: [PATCH 59/70] write output for prediction

---
 eval/evalEV.py   | 192 +++++++++++++++++++++++++++++++++++++++++++++++
 eval/evalRE.py   | 173 ++++++++++++++++++++++++++++++++++++++++++
 eval/evaluate.py |  53 +++++--------
 3 files changed, 385 insertions(+), 33 deletions(-)

diff --git a/eval/evalEV.py b/eval/evalEV.py
index 7590b4a..7cea518 100644
--- a/eval/evalEV.py
+++ b/eval/evalEV.py
@@ -500,3 +500,195 @@ def extract_fscore(path):
     return {'sub_scores': (float(sub_precision.strip()), float(sub_recall.strip()), float(sub_fscore.strip())),
             'mod_scores': (float(mod_precision.strip()), float(mod_recall.strip()), float(mod_fscore.strip())),
             'tot_scores': (float(tot_precision.strip()), float(tot_recall.strip()), float(tot_fscore.strip()))}
+
+# write events to file
+def write_ev_2file_bio(pred_output, pred_ents, result_dir, g_entity_ids_, params):
+    a2dir = result_dir + 'ev-last/ev-tok-a2/'
+    anndir = result_dir + 'ev-last/ev-tok-ann/'
+    rev_type_map = params['mappings']['rev_type_map']
+
+    # entity id mapping
+    # feid_mapping = collections.OrderedDict()
+
+    if not os.path.exists(a2dir):
+        os.makedirs(a2dir)
+    else:
+        os.system('rm ' + a2dir + '*.a2')
+
+    if not os.path.exists(anndir):
+        os.makedirs(anndir)
+    else:
+        os.system('rm ' + anndir + '*.a2')
+        os.system('rm ' + anndir + '*.a1')
+
+    # write event and triggers, (and entity: if predict both entity and trigger)
+    for fid, preds in pred_output.items():
+        ev_en_preds_ = preds[0]
+        events = preds[1]
+
+        enid_mapping, en_preds_out_, a2_ents_ = mapping_entity_id(pred_ents[fid], g_entity_ids_[fid], rev_type_map, params)
+
+        # entity and trigger for ann file
+        ann_en_lines = []
+        ann_tr_lines = []
+        # write entity and trigger from entity predictions
+        for pr_id, e_pred in pred_ents[fid].items():
+            e0_id = e_pred[0]
+            e_id = enid_mapping[e0_id]
+
+            output = ''.join(
+                [e_id, '\t', rev_type_map[e_pred[1]], ' ', str(e_pred[2][0]), ' ', str(e_pred[2][1]), '\t',
+                 e_pred[3], '\n'])
+
+            if e0_id.startswith('TR'):
+                ann_tr_lines.append(output)
+
+            # only write entity to a1
+            elif e0_id.startswith('T'):
+
+                # entity in a2
+                if e0_id in a2_ents_:
+                    ann_tr_lines.append(output)
+
+                else:
+                    ann_en_lines.append(output)
+
+
+        # entity and trigger output for a2
+        a2_en_lines_ = []
+        a2_tr_lines_ = []
+
+        # write entity and trigger only included event predictions
+        # write entity and then trigger
+        for e_pred in ev_en_preds_:
+            e0_id = e_pred[0]
+            e_id = enid_mapping[e0_id]
+
+            output = ''.join(
+                [e_id, '\t', rev_type_map[e_pred[1]], ' ', str(e_pred[2][0]), ' ', str(e_pred[2][1]), '\t',
+                 e_pred[3], '\n'])
+
+            if e0_id.startswith('TR'):
+                a2_tr_lines_.append(output)
+            elif e0_id.startswith('T'):
+
+                # entity in a2
+                if e0_id in a2_ents_:
+                    a2_tr_lines_.append(output)
+
+                else:
+                    a2_en_lines_.append(output)
+
+        # event output
+        ev_lines = []
+
+        # count event id
+        f_evid = 0
+
+        # mapping event id to incremental id
+        f_evid_map = collections.OrderedDict()
+
+        # store modality
+        mod_list = []
+
+        for event_ in events:
+
+            # create event id
+            evid = convert_evid_to_number(event_[0])
+
+            # lookup in the map or create a new id
+            if evid in f_evid_map:
+                evid_out = f_evid_map[evid]
+            else:
+                f_evid += 1
+                evid_out = f_evid
+                f_evid_map[evid] = evid_out
+
+            trid = event_[1][0]
+            trid = enid_mapping[trid]
+            typeEV = rev_type_map[event_[1][1]]
+            args_data = event_[2]
+            mod_pred = event_[3]
+
+            args_output = ''
+            for arg_ in args_data:
+
+                # relation type
+                typeR = arg_[0]
+
+                # check event or entity argument
+                if len(arg_) > 2:
+                    argIdE = arg_[1]
+                    nest_evid = convert_evid_to_number(argIdE)
+                    if nest_evid in f_evid_map:
+                        nest_evid_out = f_evid_map[nest_evid]
+                        eid = 'E' + str(nest_evid_out)
+                    else:
+                        print('ERROR: NESTED EVENT BUT MISSING EVENT ARGUMENT.')
+
+                # entity argument
+                else:
+                    a2data = arg_[1]
+                    eid = a2data[0]
+
+                    # mapping entity id: predict entity or entity in a2
+                    if params['ner_predict_all'] or eid in a2_ents_:
+                        eid = enid_mapping[eid]
+
+                if len(args_output) > 0:
+                    args_output += ' '
+
+                args_output += typeR + ':' + eid
+
+            # if has argument
+            if len(args_output) > 0:
+                output = ''.join(['E', str(evid_out), '\t', typeEV, ':', trid, ' ', args_output, '\n'])
+                ev_lines.append(output)
+
+            # no argument
+            else:
+                output = ''.join(['E', str(evid_out), '\t', typeEV, ':', trid, '\n'])
+                ev_lines.append(output)
+
+            # check and store modality
+            if mod_pred > 1:
+                mod_value = params['mappings']['rev_modality_map'][mod_pred]
+                mod_list.append([mod_value, evid_out])
+
+        # write modality
+        if len(mod_list) > 0:
+            for mod_id, mod_data in enumerate(mod_list):
+                mod_type = mod_data[0]
+                evid_out = mod_data[1]
+                output = ''.join(['M', str(mod_id + 1), '\t', mod_type, ' ', 'E', str(evid_out), '\n'])
+                ev_lines.append(output)
+
+        # write a2 files
+        with open(a2dir + fid + '.a2', 'w') as o2file:
+
+            # write entity
+            if params['ner_predict_all']:
+                for entity in a2_en_lines_:
+                    o2file.write(entity)
+
+            for trigger in a2_tr_lines_:
+                o2file.write(trigger)
+            for event in ev_lines:
+                o2file.write(event)
+
+        # write ann file
+        with open(anndir + fid + '.a1', 'w') as o1file:
+            for entity in ann_en_lines:
+                o1file.write(entity)
+
+        with open(anndir + fid + '.a2', 'w') as annfile:
+            for entity in ann_en_lines:
+                annfile.write(entity)
+            for trigger in ann_tr_lines:
+                annfile.write(trigger)
+
+            # events are the same for both a2 and ann
+            for event in ev_lines:
+                annfile.write(event)
+
+    return
\ No newline at end of file
diff --git a/eval/evalRE.py b/eval/evalRE.py
index ba3c077..cc2b3a3 100644
--- a/eval/evalRE.py
+++ b/eval/evalRE.py
@@ -508,3 +508,176 @@ def extract_fscore(path):
                                                 'so_p': float(soft_p.strip()) * 100}
 
     return report
+
+def write_entity_relations(result_dir, fidss, ent_anns, rel_anns, params):
+    # def gen_annotation(fidss, ent_anns, rel_anns, params, result_dir):
+    """Generate entity and relation prediction"""
+
+    dir2wr = ''.join([result_dir, 'rel-last/rel-ann/'])
+    if not os.path.exists(dir2wr):
+        os.makedirs(dir2wr)
+    else:
+        os.system('rm ' + dir2wr + '*.ann')
+
+    # Initial ent+rel map
+    map = defaultdict()
+
+    for fids in fidss:
+        for fid in fids:
+            map[fid] = {'ents': {}, 'rels': {}}
+
+    for xi, (fids, ent_ann, rel_ann) in enumerate(zip(fidss, ent_anns, rel_anns)):
+        # Mapping entities
+        entity_map = defaultdict()
+        for xb, (fid) in enumerate(fids):
+            span_indices = ent_ann['span_indices'][xb]
+            ner_terms = ent_ann['ner_terms'][xb]
+            ner_preds = ent_ann['ner_preds'][xb]
+            words = ent_ann['words'][xb]
+            offsets = ent_ann['offsets'][xb]
+            sub_to_words = ent_ann['sub_to_words'][xb]
+
+            entities = map[fid]['ents']
+
+            for x, pair in enumerate(span_indices):
+                if pair[0].item() == -1:
+                    break
+                if ner_preds[x] > 0:
+                    try:
+                        e_id = ner_terms.id2term[x]
+                        e_type = params['mappings']['rev_type_map'][
+                            params['mappings']['nn_mapping']['tag2type_map'][ner_preds[x]]]
+                        if 'pipeline_entity_org_map' in params:
+                            if e_id in params['pipeline_entity_org_map'][fid]:
+                                e_words, e_offset = params['pipeline_entity_org_map'][fid][e_id]
+                            else:
+                                print(e_id)
+                                e_words, e_offset = get_entity_attrs(pair, words, offsets, sub_to_words)
+                        else:
+                            e_words, e_offset = get_entity_attrs(pair, words, offsets, sub_to_words)
+
+                        # save entity map
+                        entity_map[(xb, x)] = (
+                            ner_preds[x], e_id, e_type, e_words, e_offset)
+
+                        # save entity dic info
+                        entities[e_id] = {"id": e_id, "type": e_type, "start": e_offset[0], "end": e_offset[1],
+                                          "ref": e_words}
+                    except KeyError as error:
+                        print('pred not map term', error)
+        if len(rel_ann) > 0:
+            # Mapping relations
+            pairs_idx = rel_ann['pairs_idx']
+            rel_preds = rel_ann['rel_preds']
+
+            pairs_idx_i = pairs_idx[0]
+            pairs_idx_j = pairs_idx[1]
+            pairs_idx_k = pairs_idx[2]
+
+            for x, i in enumerate(pairs_idx_i):
+                relations = map[fids[i]]['rels']
+                r_count = len(relations) + 1
+
+                j = pairs_idx_j[x]
+                k = pairs_idx_k[x]
+                rel = rel_preds[x].item()
+                role = params['mappings']['rev_rel_map'][rel].split(":")[1]
+                if role != 'Other':
+
+                    try:
+                        arg1s = entity_map[(i.item(), j.item())]
+                        arg2s = entity_map[(i.item(), k.item())]
+
+                        if int(params['mappings']['rev_rel_map'][rel].split(":")[0]) > int(
+                                params['mappings']['rev_rel_map'][rel].split(":")[-1]):
+                            arg1 = arg2s[1]
+                            arg2 = arg1s[1]
+                        else:
+                            arg1 = arg1s[1]
+                            arg2 = arg2s[1]
+                        r_id = 'R' + str(r_count)
+                        r_count += 1
+                        relations[r_id] = {"id": r_id, "role": role,
+                                           "left_arg": {"label": "Arg1", "id": arg1},
+                                           "right_arg": {"label": "Arg2", "id": arg2}}
+                    except KeyError as error:
+                        print('error relation', fids[i], error)
+
+    for fid, ners_rels in map.items():
+        write_annotation_file_bio(dir2wr, fid, entities=ners_rels['ents'],
+                              relations=ners_rels['rels'])
+
+
+def mapping_entity_id(entities_):
+    eid = 1
+    enid_mapping = collections.OrderedDict()
+    en_preds_out_ = []
+
+    # create mapping for entity id first
+    for en_id, en_data in entities_.items():
+
+        if en_id.startswith('TR'):
+            continue
+
+        elif en_id.startswith('T'):
+            enid_mapping[en_id] = 'T' + str(eid)
+            eid += 1
+            en_preds_out_.append(en_data)
+
+    # creat mapping for trigger id
+    for en_id, en_data in entities_.items():
+
+        if en_id.startswith('TR'):
+            enid_mapping[en_id] = 'T' + str(eid)
+            eid += 1
+            en_preds_out_.append(en_data)
+
+    return enid_mapping, en_preds_out_
+
+
+def write_annotation_file_bio(dir2wr, fid, entities=None, relations=None):
+    re_lines = []
+    en_lines = []
+    tr_lines = []
+
+    # entity id mapping
+    enid_mapping, en_preds_out_ = mapping_entity_id(entities)
+
+    if entities:
+        for entity in en_preds_out_:
+            entity_annotation = "{}\t{} {} {}\t{}".format(
+                enid_mapping[entity["id"]],
+                entity["type"],
+                entity["start"],
+                entity["end"],
+                entity["ref"],
+            )
+
+            re_lines.append(entity_annotation)
+
+            if entity["id"].startswith('TR'):
+                tr_lines.append(entity_annotation)
+
+            elif entity["id"].startswith('T'):
+                en_lines.append(entity_annotation)
+
+    if relations:
+        for relation in relations.values():
+            relation_annotation = "{}\t{} {}:{} {}:{}".format(
+                relation["id"],
+                relation["role"],
+                relation["left_arg"]["label"],
+                enid_mapping[relation["left_arg"]["id"]],
+                relation["right_arg"]["label"],
+                enid_mapping[relation["right_arg"]["id"]],
+            )
+            re_lines.append(relation_annotation)
+
+    # write to file
+    re_file = ''.join([dir2wr, fid, '-RE.ann'])
+    en_file = ''.join([dir2wr, fid, '-EN.ann'])
+    tr_file = ''.join([dir2wr, fid, '-TR.ann'])
+
+    write_lines(re_lines, re_file)
+    write_lines(en_lines, en_file)
+    write_lines(tr_lines, tr_file)
\ No newline at end of file
diff --git a/eval/evaluate.py b/eval/evaluate.py
index e80ce6e..fda46e5 100644
--- a/eval/evaluate.py
+++ b/eval/evaluate.py
@@ -3,8 +3,8 @@
 import torch
 from tqdm import tqdm
 
-from eval.evalEV import evaluate_ev
-from eval.evalRE import estimate_perf, estimate_rel, gen_annotation
+from eval.evalEV import evaluate_ev, write_ev_2file_bio
+from eval.evalRE import estimate_perf, estimate_rel, write_entity_relations
 from eval.evalNER import eval_nner
 from scripts.pipeline_process import gen_ner_ann_files, gen_rel_ann_files
 from utils import utils
@@ -649,36 +649,23 @@ def predict_bio(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, pa
         if params['gpu'] >= 0:
             torch.cuda.empty_cache()
     # write entity and relation prediction
-    gen_annotation(fidss, ent_anns, rel_anns, params, result_dir)
-    # _ = write_entity_relations(
-    #     result_dir=result_dir,
-    #     fidss=fidss,
-    #     ent_anns=ent_anns,
-    #     rel_anns=rel_anns,
-    #     params=params
-    # )
+    _ = write_entity_relations(
+        result_dir=result_dir,
+        fidss=fidss,
+        ent_anns=ent_anns,
+        rel_anns=rel_anns,
+        params=params
+    )
 
     if is_eval_ev > 0:
-        # write_events(fids=fidss,
-        #              all_ent_preds=ent_preds,
-        #              all_words=wordss,
-        #              all_offsets=offsetss,
-        #              all_span_terms=all_ner_terms,
-        #              all_span_indices=span_indicess,
-        #              all_sub_to_words=sub_to_wordss,
-        #              all_ev_preds=ev_preds,
-        #              g_entity_ids_=g_entity_ids_,
-        #              params=params,
-        #              result_dir=result_dir)
-
-        _ = evaluate_ev(fids=fidss,
-                        all_ent_preds=ent_preds,
-                        all_words=wordss,
-                        all_offsets=offsetss,
-                        all_span_terms=all_ner_terms,
-                        all_span_indices=span_indicess,
-                        all_sub_to_words=sub_to_wordss,
-                        all_ev_preds=ev_preds,
-                        params=params,
-                        gold_dir=eval_data,
-                        result_dir=result_dir)
+        write_ev_2file_bio(fids=fidss,
+                           all_ent_preds=ent_preds,
+                           all_words=wordss,
+                           all_offsets=offsetss,
+                           all_span_terms=all_ner_terms,
+                           all_span_indices=span_indicess,
+                           all_sub_to_words=sub_to_wordss,
+                           all_ev_preds=ev_preds,
+                           g_entity_ids_=g_entity_ids_,
+                           params=params,
+                           result_dir=result_dir)

From 3c2eca08d41cf3348f156516ae65ba22d8b77bfb Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 02:09:30 +0900
Subject: [PATCH 60/70] fix bug

---
 eval/evalRE.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/eval/evalRE.py b/eval/evalRE.py
index cc2b3a3..85b1db5 100644
--- a/eval/evalRE.py
+++ b/eval/evalRE.py
@@ -1,4 +1,5 @@
 import os
+import collections
 from collections import defaultdict
 
 import numpy as np

From 653de588fc64bab27d2c3d25975292d34241bc57 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 02:17:04 +0900
Subject: [PATCH 61/70] write output

---
 eval/evalEV.py   | 27 +++++++++++++++++++++++++++
 eval/evaluate.py |  4 ++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/eval/evalEV.py b/eval/evalEV.py
index 7cea518..bd31e05 100644
--- a/eval/evalEV.py
+++ b/eval/evalEV.py
@@ -502,6 +502,33 @@ def extract_fscore(path):
             'tot_scores': (float(tot_precision.strip()), float(tot_recall.strip()), float(tot_fscore.strip()))}
 
 # write events to file
+
+# generate event output and evaluation
+def write_events_bio(fids, all_ent_preds, all_words, all_offsets, all_span_terms, all_span_indices, all_sub_to_words,
+                 all_ev_preds, g_entity_ids_, params, result_dir):
+    # generate predicted entities
+    pred_ents = generate_entities(fids=fids,
+                                  all_e_preds=all_ent_preds,
+                                  all_words=all_words,
+                                  all_offsets=all_offsets,
+                                  all_span_terms=all_span_terms,
+                                  all_span_indices=all_span_indices,
+                                  all_sub_to_words=all_sub_to_words,
+                                  params=params)
+
+    # generate predicted events
+    pred_evs = generate_events(fids=fids,
+                               all_ev_preds=all_ev_preds,
+                               params=params)
+
+    # generate event output
+    preds_output = generate_ev_output(pred_ents, pred_evs, params)
+
+    # write output to file
+    write_ev_2file_bio(preds_output, pred_ents, result_dir, g_entity_ids_, params)
+
+    return
+
 def write_ev_2file_bio(pred_output, pred_ents, result_dir, g_entity_ids_, params):
     a2dir = result_dir + 'ev-last/ev-tok-a2/'
     anndir = result_dir + 'ev-last/ev-tok-ann/'
diff --git a/eval/evaluate.py b/eval/evaluate.py
index fda46e5..5c4022c 100644
--- a/eval/evaluate.py
+++ b/eval/evaluate.py
@@ -3,7 +3,7 @@
 import torch
 from tqdm import tqdm
 
-from eval.evalEV import evaluate_ev, write_ev_2file_bio
+from eval.evalEV import evaluate_ev, write_events_bio
 from eval.evalRE import estimate_perf, estimate_rel, write_entity_relations
 from eval.evalNER import eval_nner
 from scripts.pipeline_process import gen_ner_ann_files, gen_rel_ann_files
@@ -658,7 +658,7 @@ def predict_bio(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, pa
     )
 
     if is_eval_ev > 0:
-        write_ev_2file_bio(fids=fidss,
+        write_events_bio(fids=fidss,
                            all_ent_preds=ent_preds,
                            all_words=wordss,
                            all_offsets=offsetss,

From 6ba0689504f0fdbeb471b6769b0afb43b4e77350 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 02:40:13 +0900
Subject: [PATCH 62/70] fix bug

---
 model/EVNet.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/model/EVNet.py b/model/EVNet.py
index 5ba06d2..21c6b4d 100644
--- a/model/EVNet.py
+++ b/model/EVNet.py
@@ -698,6 +698,7 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn, n_epoch):
 
         # nested loss
         nest_ev_loss = 0
+        empty_pred = True
 
         # loop until stop nested event prediction or no more events predicted, or in limited nested levels
         while enable_nested_ev and len(current_positive_ids) > 0 and current_nested_level < self.params['max_ev_level']:
@@ -726,6 +727,8 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn, n_epoch):
             current_tr_ids = ev_nest_cand_ids4nn['trids_']
             current_truth_ids = ev_nest_cand_ids4nn['truth_ids_']
 
+            empty_pred = False
+
             # check non-empty
             if len(ev_nest_cand_ids4nn['trids_']) > 0:
 
@@ -778,7 +781,7 @@ def calculate(self, ent_embeds, rel_embeds, rpred_types, ev_ids4nn, n_epoch):
         if enable_modality:
             ev_loss = ev_loss + mod_losses * self.params['modality_weight']
 
-        return pred_ev_output, ev_loss
+        return pred_ev_output, ev_loss, empty_pred
 
     def forward(self, ner_preds, rel_preds, n_epoch):
         """Forward.
@@ -826,7 +829,9 @@ def forward(self, ner_preds, rel_preds, n_epoch):
             # 3-embeds, prediction, and loss
             # check empty
             if len(ev_ids4nn['ev_cand_ids4nn']['trids_']) > 0:
-                ev_out, ev_loss = self.calculate(ent_embeds, rel_embeds, rpred_types, ev_ids4nn, n_epoch)
+                ev_out, ev_loss, empty_pred = self.calculate(ent_embeds, rel_embeds, rpred_types, ev_ids4nn, n_epoch)
+                if empty_pred:
+                    ev_out = None
                 return {'output': ev_out, 'loss': ev_loss}
 
             else:

From 1150ab28ff6b088f732daad026bc8506b35290eb Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 02:53:16 +0900
Subject: [PATCH 63/70] fix bug

---
 eval/evaluate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eval/evaluate.py b/eval/evaluate.py
index 5c4022c..47aa182 100644
--- a/eval/evaluate.py
+++ b/eval/evaluate.py
@@ -630,7 +630,7 @@ def predict_bio(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, pa
             ent_preds.append(ner_out["nner_preds"])
 
             # add predicted events
-            ev_preds.append(ev_out)
+            ev_preds.append(ev_out['output'])
 
             span_indicess.append(
                 [

From 46a0f93ec7633bc1b782b8b323a5231a6777651f Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 02:55:00 +0900
Subject: [PATCH 64/70] event prediction

---
 eval/evalEV.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/eval/evalEV.py b/eval/evalEV.py
index bd31e05..b1f77b8 100644
--- a/eval/evalEV.py
+++ b/eval/evalEV.py
@@ -529,6 +529,56 @@ def write_events_bio(fids, all_ent_preds, all_words, all_offsets, all_span_terms
 
     return
 
+def mapping_entity_id(en_preds_, g_entity_ids_, rev_type_map, params):
+    # if gold entity, starting trigger id from max entity id + 1
+    if not params['ner_predict_all'] and len(g_entity_ids_) > 0:
+        eid = g_entity_ids_[-1] + 1
+
+    # predict both entity and trigger
+    else:
+        eid = 1
+
+    # mapping
+    enid_mapping = collections.OrderedDict()
+    en_preds_out_ = []
+
+    # entity in a2
+    a2_ents_ = []
+
+    # create mapping for entity id first
+    for pr_id, en_pred in en_preds_.items():
+
+        # id
+        en_id = en_pred[0]
+
+        if en_id.startswith('TR'):
+            continue
+
+        elif en_id.startswith('T'):
+            enid_mapping[en_id] = 'T' + str(eid)
+            eid += 1
+            en_preds_out_.append(en_pred)
+
+        # using gold entity but in a2
+        if not params['ner_predict_all']:
+            etype = rev_type_map[en_pred[1]]
+
+            # check entity type in a2
+            if etype in params['a2_entities']:
+                a2_ents_.append(en_id)
+
+    # creat mapping for trigger id
+    for pr_id, en_pred in en_preds_.items():
+        # id
+        en_id = en_pred[0]
+
+        if en_id.startswith('TR'):
+            enid_mapping[en_id] = 'T' + str(eid)
+            eid += 1
+            en_preds_out_.append(en_pred)
+
+    return enid_mapping, en_preds_out_, a2_ents_
+
 def write_ev_2file_bio(pred_output, pred_ents, result_dir, g_entity_ids_, params):
     a2dir = result_dir + 'ev-last/ev-tok-a2/'
     anndir = result_dir + 'ev-last/ev-tok-ann/'

From f0c2d082015e4df0ba33e56ff12e1d22c707b139 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 02:59:03 +0900
Subject: [PATCH 65/70] data path

---
 run.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/run.sh b/run.sh
index d6e57a8..f6d4e1a 100644
--- a/run.sh
+++ b/run.sh
@@ -38,8 +38,8 @@ elif [ "$TASK" = "offset" ]; then
 
     # paths
     REFDIR="data/corpora/$CORPUS_NAME/$DEV_TEST/" # reference gold data
-    PREDDIR="$TASK_DIR/predict-$GOLD_E2E-$DEV_TEST/ev-last/ev-tok-a2/"
-    OUTDIR="$TASK_DIR/predict-$GOLD_E2E-$DEV_TEST/ev-last/" # retrieve the original offsets
+    PREDDIR="$TASK_DIR/deepem-bionlp/predict-$GOLD_E2E-$DEV_TEST/ev-last/ev-tok-a2/"
+    OUTDIR="$TASK_DIR/deepem-bionlp/predict-$GOLD_E2E-$DEV_TEST/ev-last/" # retrieve the original offsets
 
     # retrieve the original offsets and create zip format for online evaluation
     python scripts/postprocess.py $REFDIR $PREDDIR $OUTDIR $CORPUS_NAME $DEV_TEST
@@ -55,7 +55,7 @@ elif [ "$TASK" = "eval" ]; then
 
     # paths
     REFDIR="data/original_corpora/$CORPUS_NAME/$DEV_TEST/" # reference gold data
-    PREDDIR="$TASK_DIR/predict-$GOLD_E2E-$DEV_TEST/ev-last/ev-orig-a2/"
+    PREDDIR="$TASK_DIR/deepem-bionlp/predict-$GOLD_E2E-$DEV_TEST/ev-last/ev-orig-a2/"
 
 
 

From 94abab331ba7245df88f19a05e746db66d6d66b8 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 03:10:25 +0900
Subject: [PATCH 66/70] install pubmed requirements

---
 setup/requirements.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/setup/requirements.txt b/setup/requirements.txt
index 9fc92d4..ddeef4b 100644
--- a/setup/requirements.txt
+++ b/setup/requirements.txt
@@ -12,4 +12,8 @@ pytorch-nlp
 tqdm
 requests
 torch==1.1.0
-torchvision==0.3.0
\ No newline at end of file
+torchvision==0.3.0
+git+https://github.com/titipata/pubmed_parser.git
+ebooklib
+beautifulsoup4
+lxml
\ No newline at end of file

From c468db2127a4b244a9a8116ec02c2775547b891d Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 03:14:34 +0900
Subject: [PATCH 67/70] pubmed configs

---
 scripts/generate_configs_bio.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/scripts/generate_configs_bio.py b/scripts/generate_configs_bio.py
index b0291e4..8730797 100644
--- a/scripts/generate_configs_bio.py
+++ b/scripts/generate_configs_bio.py
@@ -49,7 +49,7 @@ def gen_predict_config(predict_config, specific_config, eval_set, config_dir, mo
 
 def gen_predict_config_pubmed(predict_config, specific_config, config_dir, expdir, dataname):
     predict_config['test_data'] = ''.join(["data/", dataname, "/processed-text/", "text/"])
-    predict_config['result_dir'] = ''.join([expdir, dataname, '/deepem-bionlp', '/results/'])
+    predict_config['result_dir'] = ''.join([expdir, dataname, '/results/'])
     predict_config['raw_text'] = True
     predict_config['ner_predict_all'] = True
 
@@ -128,8 +128,7 @@ def generate_configs_pubmed(expdir, dataname, model_name, gpu):
     """Generate configs for all."""
 
     # create experiment dir
-    config_dir = os.path.join(os.path.join(expdir, dataname), 'deepem-bionlp')
-    config_dir = os.path.join(config_dir, 'configs')
+    config_dir = os.path.join(os.path.join(expdir, dataname), 'configs')
     utils.makedir(config_dir)
 
     # default setting

From 2a4c359f47bf92e681e9b179f705f71d48dd30c9 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 03:18:13 +0900
Subject: [PATCH 68/70] predict on pubmed for raw-text

---
 loader/prepData/brat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loader/prepData/brat.py b/loader/prepData/brat.py
index b2ba019..eace03a 100644
--- a/loader/prepData/brat.py
+++ b/loader/prepData/brat.py
@@ -207,7 +207,7 @@ def brat_loader(files_fold, params):
             fevents['ids'] = idsE
 
         # check empty
-        if len(idsT) == len(idsTR) == 0:
+        if len(idsT) == 0 and not params['raw_text']:
             continue
 
         else:

From bdd0c9b3653fe5465260357925baf67aa4392e7d Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 03:19:10 +0900
Subject: [PATCH 69/70] raw text config

---
 configs/default.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configs/default.yaml b/configs/default.yaml
index 8e02934..bdada65 100644
--- a/configs/default.yaml
+++ b/configs/default.yaml
@@ -120,4 +120,5 @@ gold_eval: False
 
 # display options
 stats: True # print relations
-show_macro: False # print result for relations
\ No newline at end of file
+show_macro: False # print result for relations
+raw_text: False
\ No newline at end of file

From be6f6136cc461d23f43a2bd3ef5779bb23a85116 Mon Sep 17 00:00:00 2001
From: trieuhl <longtrieujp@gmail.com>
Date: Tue, 22 Mar 2022 03:21:53 +0900
Subject: [PATCH 70/70] brat path

---
 pubmed.sh | 2 +-
 run.sh    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pubmed.sh b/pubmed.sh
index 76e3549..dc7c5a3 100644
--- a/pubmed.sh
+++ b/pubmed.sh
@@ -175,7 +175,7 @@ elif [ "$TASK" = "e2e" ]; then
     echo "6. Prepare data for brat"
 
     PRED_DIR="experiments/$MY_DATA/results/ev-last/$MY_DATA-brat/"
-    BRAT_DIR="brat/brat-v1.3_Crunchy_Frog/data/"
+    BRAT_DIR="brat/data/"
 
     # annotation file
     CONFIG="configs/brat/$MODEL_NAME"
diff --git a/run.sh b/run.sh
index f6d4e1a..3af290d 100644
--- a/run.sh
+++ b/run.sh
@@ -70,7 +70,7 @@ elif [ "$TASK" = "brat" ]; then
     DEV_TEST=$4 # predict for dev, test sets
 
     PRED_DIR="$TASK_DIR/predict-$GOLD_E2E-$DEV_TEST/ev-last/$CORPUS_NAME-brat/"
-    BRAT_DIR="brat/brat-v1.3_Crunchy_Frog/data/"
+    BRAT_DIR="brat/data/"
 
     # annotation file
     CONFIG="configs/brat/$CORPUS_NAME"