From 5b1cd72ecf0f49903d3af7025a9325bf3a0f3286 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Wed, 25 Mar 2020 15:53:14 +0900
Subject: [PATCH 01/24] added encoding utf-8 to io

---
 chatette/adapters/_base.py                  | 2 +-
 chatette/adapters/rasa.py                   | 2 +-
 chatette/parsing/line_count_file_wrapper.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/chatette/adapters/_base.py b/chatette/adapters/_base.py
index 71c7900d..8ae57785 100644
--- a/chatette/adapters/_base.py
+++ b/chatette/adapters/_base.py
@@ -54,7 +54,7 @@ def write(self, output_directory, examples, synonyms):
                 self.__get_file_name(
                     batch, output_directory, single_file_output
                 )
-            with io.open(output_file_path, 'w') as output_file:
+            with io.open(output_file_path, 'w', encoding='utf-8') as output_file:
                 self._write_batch(output_file, batch)
 
     @classmethod
diff --git a/chatette/adapters/rasa.py b/chatette/adapters/rasa.py
index c5b63570..f6ce655b 100644
--- a/chatette/adapters/rasa.py
+++ b/chatette/adapters/rasa.py
@@ -68,7 +68,7 @@ def _get_base_to_extend(self):
         if self._base_file_contents is None:
             if self._base_filepath is None:
                 return self._get_empty_base()
-            with io.open(self._base_filepath, 'r') as base_file:
+            with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file:
                 self._base_file_contents = json.load(base_file)
             self.check_base_file_contents()
         return self._base_file_contents
diff --git a/chatette/parsing/line_count_file_wrapper.py b/chatette/parsing/line_count_file_wrapper.py
index ace354f1..60fd29f1 100644
--- a/chatette/parsing/line_count_file_wrapper.py
+++ b/chatette/parsing/line_count_file_wrapper.py
@@ -15,7 +15,7 @@ class LineCountFileWrapper(object):
     
     def __init__(self, filepath, mode='r'):
         self.name = cast_to_unicode(filepath)
-        self.f = io.open(filepath, mode)
+        self.f = io.open(filepath, mode, encoding='utf-8')
         self.line_nb = 0
 
     def close(self):

From eef5212d698781ba86055b0857b75ceda8448d1c Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 17 Aug 2020 15:40:33 +0900
Subject: [PATCH 02/24] added rasa YAML adapter

---
 chatette/__main__.py          |   2 +-
 chatette/adapters/factory.py  |   3 +
 chatette/adapters/rasa_yml.py | 104 ++++++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 1 deletion(-)
 create mode 100644 chatette/adapters/rasa_yml.py

diff --git a/chatette/__main__.py b/chatette/__main__.py
index 08be094f..28a9f121 100644
--- a/chatette/__main__.py
+++ b/chatette/__main__.py
@@ -75,7 +75,7 @@ def _add_optional_arguments(argument_parser):
     argument_parser.add_argument(
         "-a", "--adapter", dest="adapter", required=False,
         type=str, default="rasa",
-        help="Write adapter. Possible values: ['rasa', 'jsonl', 'rasamd']"
+        help="Write adapter. Possible values: ['rasa', 'jsonl', 'rasamd', 'rasayml']"
     )
     argument_parser.add_argument(
         "--base-file", dest="base_filepath",
diff --git a/chatette/adapters/factory.py b/chatette/adapters/factory.py
index d7426c7b..7b71246b 100644
--- a/chatette/adapters/factory.py
+++ b/chatette/adapters/factory.py
@@ -6,6 +6,7 @@
 from chatette.adapters.jsonl import JsonListAdapter
 from chatette.adapters.rasa import RasaAdapter
 from chatette.adapters.rasa_md import RasaMdAdapter
+from chatette.adapters.rasa_yml import RasaYMLAdapter
 
 
 def create_adapter(adapter_name, base_filepath=None):
@@ -23,6 +24,8 @@ def create_adapter(adapter_name, base_filepath=None):
         return RasaAdapter(base_filepath)
     elif adapter_name in ('rasa-md', 'rasamd'):
         return RasaMdAdapter(base_filepath)
+    elif adapter_name in ('rasa-yml', 'rasayml'):
+        return RasaYMLAdapter(base_filepath)
     elif adapter_name == 'jsonl':
         return JsonListAdapter(base_filepath)
     raise ValueError("Unknown adapter was selected.")
diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py
new file mode 100644
index 00000000..d781775c
--- /dev/null
+++ b/chatette/adapters/rasa_yml.py
@@ -0,0 +1,104 @@
+import os
+import io
+from collections import OrderedDict
+import ruamel.yaml as yaml
+
+from chatette.adapters._base import Adapter
+from chatette.utils import append_to_list_in_dict, cast_to_unicode
+
+def intent_dict_to_list_of_dict(data):
+    list_data = []
+    for key, values in data.items():
+        list_data.append(
+            {
+                "intent": key,
+                "examples": '\n'.join(['- ' + v for v in values]) + '\n'
+            }
+        )
+
+    return list_data
+
+class RasaYMLAdapter(Adapter):
+    def __init__(self, base_filepath=None):
+        super(RasaYMLAdapter, self).__init__(base_filepath, None)
+        self._base_file_contents = None
+
+    @classmethod
+    def _get_file_extension(cls):
+        return "yml"
+
+    def __get_file_name(self, batch, output_directory, single_file):
+        if single_file:
+            return \
+                os.path.join(
+                    output_directory, "nlu." + self._get_file_extension()
+                )
+        raise ValueError(
+            "Tried to generate several files with Rasa YAML adapter."
+        )
+
+    def _write_batch(self, output_file_handle, batch):
+        data = self._get_base_to_extend()
+        prepared_examples = dict()
+        for example in batch.examples:
+            append_to_list_in_dict(
+                prepared_examples,
+                example.intent_name, self.prepare_example(example)
+            )
+        prepared_examples = intent_dict_to_list_of_dict(prepared_examples)
+        prepared_examples.extend(
+            self.__format_synonyms(batch.synonyms)
+        )
+        data['nlu'] = prepared_examples
+        data = cast_to_unicode(data)
+
+        yaml.scalarstring.walk_tree(data)
+        yaml.round_trip_dump(data, output_file_handle, default_flow_style=False, allow_unicode=True)
+
+
+    def prepare_example(self, example):
+        if len(example.entities) == 0:
+            return example.text
+
+        sorted_entities = \
+            sorted(
+                example.entities,
+                reverse=True,
+                key=lambda entity: entity._start_index
+            )
+        result = example.text[:]
+        for entity in sorted_entities:
+            result = \
+                result[:entity._start_index] + "[" + \
+                result[entity._start_index:entity._start_index + entity._len] + \
+                ']{"entity": "' + entity.slot_name + '"}' + \
+                result[entity._start_index + entity._len:] # New rasa entity format
+        return result
+
+    @classmethod
+    def __format_synonyms(cls, synonyms):
+        # {str: [str]} -> [{"value": str, "synonyms": [str]}]
+        return [
+            {
+                "synonym": slot_name,
+                "examples": '\n'.join(['- ' + s for s in synonyms[slot_name]]) + '\n'
+            }
+            for slot_name in synonyms
+            if len(synonyms[slot_name]) > 1
+        ]
+
+    def _get_base_to_extend(self):
+        ### TODO Implement later
+        return self._get_empty_base()        
+        # if self._base_file_contents is None:
+        #     if self._base_filepath is None:
+        #         return self._get_empty_base()
+        #     with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file:
+        #         self._base_file_contents = json.load(base_file)
+        #     self.check_base_file_contents()
+        # return self._base_file_contents
+
+    def _get_empty_base(self):
+        return {
+            "nlu": list()
+        }
\ No newline at end of file

From 02e72f55c8f4fb36a7287dc73e934c7e797046b5 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 8 Mar 2021 16:47:07 +0900
Subject: [PATCH 03/24] add annotation after slot in parser

---
 chatette/parsing/lexing/rule_unit_ref.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/chatette/parsing/lexing/rule_unit_ref.py b/chatette/parsing/lexing/rule_unit_ref.py
index 9a0a3c8f..f692dfad 100644
--- a/chatette/parsing/lexing/rule_unit_ref.py
+++ b/chatette/parsing/lexing/rule_unit_ref.py
@@ -11,6 +11,7 @@
     extract_identifier, \
     CASE_GEN_SYM, UNIT_END_SYM
 
+from chatette.parsing.lexing.rule_annotation import RuleAnnotation
 from chatette.parsing.lexing.rule_unit_start import RuleUnitStart
 from chatette.parsing.lexing.rule_variation import RuleVariation
 from chatette.parsing.lexing.rule_rand_gen import RuleRandGen
@@ -55,11 +56,13 @@ def _apply_strategy(self, **kwargs):
                 "using character '" + UNIT_END_SYM + "')."
             return False
 
+        is_slot = False
         # TODO maybe making a function for this would be useful
         if self._tokens[0].type == TerminalType.alias_ref_start:
             unit_end_type = TerminalType.alias_ref_end
         elif self._tokens[0].type == TerminalType.slot_ref_start:
             unit_end_type = TerminalType.slot_ref_end
+            is_slot = True
         elif self._tokens[0].type == TerminalType.intent_ref_start:
             unit_end_type = TerminalType.intent_ref_end
         else:  # Should never happen
@@ -72,5 +75,13 @@ def _apply_strategy(self, **kwargs):
         self._next_index += 1
         self._update_furthest_matched_index()
         self._tokens.append(LexicalToken(unit_end_type, UNIT_END_SYM))
-        
+
+        # This is for adding new rasa training mode that has role and group entity
+        # Reference: https://rasa.com/docs/rasa/nlu-training-data/#entities-roles-and-groups
+        annotation_rule = RuleAnnotation(self._text, self._next_index)
+        if is_slot and annotation_rule.matches():
+            self._next_index = annotation_rule.get_next_index_to_match()
+            self._update_furthest_matched_index()
+            self._tokens.extend(annotation_rule.get_lexical_tokens())
+
         return True

From 95bdfa4bd4260aa6cba8c66f7c57dfa720c45def Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Tue, 9 Mar 2021 15:24:38 +0900
Subject: [PATCH 04/24] add annotation check in parser

---
 chatette/parsing/parser.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/chatette/parsing/parser.py b/chatette/parsing/parser.py
index e2563bdf..ae2c9906 100644
--- a/chatette/parsing/parser.py
+++ b/chatette/parsing/parser.py
@@ -505,3 +505,26 @@ def _parse_choice(self, tokens):
         )
 
         return rules
+
+    def _check_for_annotations(self, tokens, i):
+
+        if (
+            i == len(tokens)
+            and tokens[i+1].type != TerminalType.annotation_start
+        ):
+            return None, i-1
+        
+        end_annotation_idx = 0
+        for j, token in enumerate(tokens[i:]):
+            if token.type == TerminalType.annotation_end:
+                end_annotation_idx = i+j
+
+        if end_annotation_idx > i:
+            annotation_tokens = tokens[i:end_annotation_idx+1]
+            annotation = self._annotation_tokens_to_dict(annotation_tokens)
+            return annotation, end_annotation_idx
+        else:
+            raise ValueError(  # Should never happen
+                    "Something wrong happens when parsing annotation" +\
+                    "for entity role or group."
+                )

From 70af8c8d287c5500bf1a005023c3ec905ad90a9a Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Wed, 10 Mar 2021 11:08:58 +0900
Subject: [PATCH 05/24] add rolegroup annotation check

---
 chatette/parsing/parser.py | 40 +++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/chatette/parsing/parser.py b/chatette/parsing/parser.py
index ae2c9906..cd654b1a 100644
--- a/chatette/parsing/parser.py
+++ b/chatette/parsing/parser.py
@@ -30,7 +30,7 @@
 from chatette.units.rule import Rule
 
 from chatette.parsing import \
-    ChoiceBuilder, UnitRefBuilder, \
+    ChoiceBuilder, UnitRefBuilder,\
     AliasDefBuilder, SlotDefBuilder, IntentDefBuilder
 
 
@@ -398,12 +398,17 @@ def _parse_rule(self, tokens):
             elif (
                 token.type in \
                 (TerminalType.alias_ref_end,
-                 TerminalType.slot_ref_end,
                  TerminalType.intent_ref_end)
             ):
                 rule_contents.append(current_builder.create_concrete())
                 current_builder = None
                 leading_space = False
+            elif token.type == TerminalType.slot_ref_end:
+                rolegroup_annotation, i = self._check_for_annotations(tokens, i)
+                current_builder.slot_rolegroup = rolegroup_annotation
+                rule_contents.append(current_builder.create_concrete())
+                current_builder = None
+                leading_space = False
             elif token.type == TerminalType.unit_identifier:
                 current_builder.identifier = token.text
             elif token.type == TerminalType.choice_start:
@@ -512,19 +517,22 @@ def _check_for_annotations(self, tokens, i):
             i == len(tokens)
             and tokens[i+1].type != TerminalType.annotation_start
         ):
-            return None, i-1
+            return None, i
         
-        end_annotation_idx = 0
-        for j, token in enumerate(tokens[i:]):
+        annotation = {}
+        current_key = None
+        for j, token in enumerate(tokens[i+1:]):
             if token.type == TerminalType.annotation_end:
-                end_annotation_idx = i+j
-
-        if end_annotation_idx > i:
-            annotation_tokens = tokens[i:end_annotation_idx+1]
-            annotation = self._annotation_tokens_to_dict(annotation_tokens)
-            return annotation, end_annotation_idx
-        else:
-            raise ValueError(  # Should never happen
-                    "Something wrong happens when parsing annotation" +\
-                    "for entity role or group."
-                )
+                i += j+1
+                break
+            elif token.type == TerminalType.key:
+                current_key = token.text
+            elif token.type == TerminalType.value:
+                if current_key in annotation:
+                    self.input_file_manager.syntax_error(
+                        "Annotation contained the key '" + current_key + \
+                        "' twice."
+                    )
+                annotation[current_key] = token.text
+        
+        return annotation, i

From 479cc3214e9625bbd0082db56120246a8a1ed913 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Wed, 10 Mar 2021 11:09:15 +0900
Subject: [PATCH 06/24] add comment

---
 chatette/parsing/lexing/rule_unit_ref.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/chatette/parsing/lexing/rule_unit_ref.py b/chatette/parsing/lexing/rule_unit_ref.py
index f692dfad..3a7f1e4d 100644
--- a/chatette/parsing/lexing/rule_unit_ref.py
+++ b/chatette/parsing/lexing/rule_unit_ref.py
@@ -79,6 +79,8 @@ def _apply_strategy(self, **kwargs):
         # This is for adding new rasa training mode that has role and group entity
         # Reference: https://rasa.com/docs/rasa/nlu-training-data/#entities-roles-and-groups
         annotation_rule = RuleAnnotation(self._text, self._next_index)
+
+        # ? Should we raise error if RuleAnnotation doesn't match, i.e. wrong pattern
         if is_slot and annotation_rule.matches():
             self._next_index = annotation_rule.get_next_index_to_match()
             self._update_furthest_matched_index()

From ceb9da341cf232ef7f0cfbe5380151a886311b7d Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Wed, 10 Mar 2021 11:09:42 +0900
Subject: [PATCH 07/24] added choice to choose unitref

---
 chatette/parsing/__init__.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/chatette/parsing/__init__.py b/chatette/parsing/__init__.py
index f570877c..753b1c46 100644
--- a/chatette/parsing/__init__.py
+++ b/chatette/parsing/__init__.py
@@ -16,7 +16,7 @@
 from future.utils import with_metaclass
 
 from chatette.units.modifiable.choice import Choice
-from chatette.units.modifiable.unit_reference import UnitReference
+from chatette.units.modifiable.unit_reference import UnitReference, SlotRoleGroupReference
 from chatette.units.modifiable.definitions.alias import AliasDefinition
 from chatette.units.modifiable.definitions.slot import SlotDefinition
 from chatette.units.modifiable.definitions.intent import IntentDefinition
@@ -91,6 +91,7 @@ def __init__(self):
         self.identifier = None
         self.variation = None
         self.arg_value = None
+        self.slot_rolegroup = None
 
     def _check_information(self):
         super(UnitRefBuilder, self)._check_information()
@@ -108,6 +109,12 @@ def _build_modifiers_repr(self):
 
     def create_concrete(self):
         self._check_information()
+        if self.slot_rolegroup is not None:
+            return SlotRoleGroupReference(
+                self.identifier, self.type,
+                self.leading_space, self._build_modifiers_repr(),
+                self.slot_rolegroup
+            )
         return UnitReference(
             self.identifier, self.type,
             self.leading_space, self._build_modifiers_repr()

From 3d34c2594843e81990bd91e81560523cfca32775 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Wed, 10 Mar 2021 11:10:01 +0900
Subject: [PATCH 08/24] added new slotref class for rolegroup

---
 chatette/units/modifiable/unit_reference.py | 51 +++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/chatette/units/modifiable/unit_reference.py b/chatette/units/modifiable/unit_reference.py
index 02b1c5e6..fcc83e65 100644
--- a/chatette/units/modifiable/unit_reference.py
+++ b/chatette/units/modifiable/unit_reference.py
@@ -126,3 +126,54 @@ def as_template_str(self):
         if self._leading_space:
             result = ' ' + result
         return result
+
+
+class SlotRoleGroupReference(UnitReference):
+    """
+    Represents a reference to a unit definition that can be contained
+    in a template rule.
+    """
+    def __init__(self, identifier, unit_type, leading_space, modifiers, rolegroup):
+        super(SlotRoleGroupReference, self).__init__(
+            identifier, unit_type, leading_space, modifiers
+        )
+        self._unit_type = 'slot'
+
+        # dictionary {"role": "value"}, or {"group": "value"}, or both
+        self._role = rolegroup.get('role', None)
+        self._group = rolegroup.get('group', None)
+
+    def _generate_random_strategy(self):
+        generated_example = super()._generate_random_strategy()
+
+        for ent in generated_example.entities:
+            if self._role is not None:
+                ent.role = self._role
+            if self._group is not None:
+                ent.group = self._group
+        return generated_example
+        
+    
+    def _generate_all_strategy(self):
+        generated_examples = super()._generate_all_strategy()
+
+        for ex in generated_examples:
+            for ent in ex.entities:
+                if self._role is not None:
+                    ent.role = self._role
+                if self._group is not None:
+                    ent.group = self._group
+
+        return generated_examples
+    
+    def _generate_n_strategy(self, n):
+        generated_examples = super()._generate_n_strategy()
+
+        for ex in generated_examples:
+            for ent in ex.entities:
+                if self._role is not None:
+                    ent.role = self._role
+                if self._group is not None:
+                    ent.group = self._group
+
+        return generated_examples
\ No newline at end of file

From f4531355b7f00fee8cabf14421b821e17595a930 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Wed, 10 Mar 2021 11:10:19 +0900
Subject: [PATCH 09/24] added role group to Entity unit

---
 chatette/units/__init__.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/chatette/units/__init__.py b/chatette/units/__init__.py
index bfbd3ff0..fb5070ae 100644
--- a/chatette/units/__init__.py
+++ b/chatette/units/__init__.py
@@ -125,11 +125,13 @@ class Entity(object):
     Represents an entity as it will be contained in examples
     (instances of `Example`).
     """
-    def __init__(self, name, length, value=None, start_index=0):
+    def __init__(self, name, length, value=None, start_index=0, role=None, group=None):
         self.slot_name = name  # name of the entity (not the associated text)
         self.value = value
         self._len = length
         self._start_index = start_index
+        self.role = role
+        self.group = group
 
     def _remove_leading_space(self):
         """
@@ -146,17 +148,27 @@ def _remove_leading_space(self):
         return True
     
     def as_dict(self):
-        return {
+        entity_dict = {
             "slot-name": self.slot_name,
             "value": self.value,
             "start-index": self._start_index,
             "end-index": self._start_index + self._len
         }
+        if self.role is not None:
+            entity_dict['role'] = self.role
+        if self.group is not None:
+            entity_dict['group'] = self.group
+        return entity_dict
     
     def __repr__(self):
         representation = "entity '" + self.slot_name + "'"
         if self.value is not None:
             representation += ":'" + self.value + "'"
+        # ? There might be better representation format?
+        if self.role is not None:
+            representation += ":'" + self.role + "'"
+        if self.group is not None:
+            representation += ":'" + self.group + "'"
         return representation
     def __str__(self):
         return \

From 7a34afef4707b7f7ee616538a28647f124aaa718 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Wed, 10 Mar 2021 13:20:50 +0900
Subject: [PATCH 10/24] fixed entity repr with rolegroup

---
 chatette/units/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chatette/units/__init__.py b/chatette/units/__init__.py
index fb5070ae..e22a46d3 100644
--- a/chatette/units/__init__.py
+++ b/chatette/units/__init__.py
@@ -166,9 +166,9 @@ def __repr__(self):
             representation += ":'" + self.value + "'"
         # ? There might be better representation format?
         if self.role is not None:
-            representation += ":'" + self.role + "'"
+            representation += ", 'role' :'" + self.role + "'"
         if self.group is not None:
-            representation += ":'" + self.group + "'"
+            representation += ", 'group' :'" + self.group + "'"
         return representation
     def __str__(self):
         return \

From 5e62dcf1388576069abd9093e6e17bc1232c889b Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 15 Mar 2021 19:03:04 +0900
Subject: [PATCH 11/24] added role,group,value format to adapter

---
 chatette/adapters/rasa.py     |  8 +++++++-
 chatette/adapters/rasa_md.py  | 15 +++++++++++----
 chatette/adapters/rasa_yml.py | 11 +++++++++--
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/chatette/adapters/rasa.py b/chatette/adapters/rasa.py
index f6ce655b..2ffe4547 100644
--- a/chatette/adapters/rasa.py
+++ b/chatette/adapters/rasa.py
@@ -38,12 +38,18 @@ def _write_batch(self, output_file_handle, batch):
 
     def prepare_example(self, example):
         def entity_to_rasa(entity):
-            return {
+            entity_dict = {
                 "entity": entity.slot_name,
                 "value": entity.value,
                 "start": entity._start_index,
                 "end": entity._start_index + entity._len,
             }
+            if entity.role is not None:
+                entity_dict['role'] = entity.role
+            if entity.group is not None:
+                entity_dict['group'] = entity.group
+
+            return entity_dict
 
         return {
             "intent": example.intent_name,
diff --git a/chatette/adapters/rasa_md.py b/chatette/adapters/rasa_md.py
index a92a3dc6..bd3702c3 100644
--- a/chatette/adapters/rasa_md.py
+++ b/chatette/adapters/rasa_md.py
@@ -76,11 +76,18 @@ def prepare_example(self, example):
             )
         result = example.text[:]
         for entity in sorted_entities:
+            entity_annotation_text = ']{"entity": "' + entity.slot_name
+            entity_text = result[entity._start_index:entity._start_index + entity._len]
+            if entity_text != entity.value:
+                entity_annotation_text += f', "value": "{entity.value}'
+            if entity.role is not None:
+                entity_annotation_text += f', "role": "{entity.role}'
+            if entity.group is not None:
+                entity_annotation_text += f', "group": "{entity.group}'
             result = \
                 result[:entity._start_index] + "[" + \
-                result[entity._start_index:entity._start_index + entity._len] + \
-                "](" + entity.slot_name + ")" + \
-                result[entity._start_index + entity._len:]
+                entity_text + entity_annotation_text + '"}' + \
+                result[entity._start_index + entity._len:] # New rasa entity format
         return result
     
 
@@ -105,7 +112,7 @@ def _get_base_to_extend(self):
         if self._base_file_contents is None:
             if self._base_filepath is None:
                 return self._get_empty_base()
-            with io.open(self._base_filepath, 'r') as base_file:
+            with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file:
                 self._base_file_contents = ''.join(base_file.readlines())
             self.check_base_file_contents()
         return self._base_file_contents
diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py
index d781775c..4c88bc96 100644
--- a/chatette/adapters/rasa_yml.py
+++ b/chatette/adapters/rasa_yml.py
@@ -68,10 +68,17 @@ def prepare_example(self, example):
             )
         result = example.text[:]
         for entity in sorted_entities:
+            entity_annotation_text = ']{"entity": "' + entity.slot_name
+            entity_text = result[entity._start_index:entity._start_index + entity._len]
+            if entity_text != entity.value:
+                entity_annotation_text += f', "value": "{entity.value}'
+            if entity.role is not None:
+                entity_annotation_text += f', "role": "{entity.role}'
+            if entity.group is not None:
+                entity_annotation_text += f', "group": "{entity.group}'
             result = \
                 result[:entity._start_index] + "[" + \
-                result[entity._start_index:entity._start_index + entity._len] + \
-                ']{"entity": "' + entity.slot_name + '"}' + \
+                entity_text + entity_annotation_text + '"}' + \
                 result[entity._start_index + entity._len:] # New rasa entity format
         return result
 

From 2d9ee0bb49ae706191af57f7e0864d7a11c6707d Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 15 Mar 2021 19:03:36 +0900
Subject: [PATCH 12/24] edited example to test role,group

---
 examples/simple/airport/aliases.chatette      | 4 ++--
 examples/simple/airport/slots/cities.chatette | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/simple/airport/aliases.chatette b/examples/simple/airport/aliases.chatette
index 4e5fa41e..12ccfd89 100644
--- a/examples/simple/airport/aliases.chatette
+++ b/examples/simple/airport/aliases.chatette
@@ -23,6 +23,6 @@
     register
     
 ~[from airport]
-    from @[source-airport]
+    from @[airport#source]("role":"source")
 ~[to airport]
-    [to go?] to @[source-airport]
+    [to go?] to @[airport#dest]('role':'destination')
diff --git a/examples/simple/airport/slots/cities.chatette b/examples/simple/airport/slots/cities.chatette
index 526d6877..a69bd5fb 100644
--- a/examples/simple/airport/slots/cities.chatette
+++ b/examples/simple/airport/slots/cities.chatette
@@ -1,10 +1,10 @@
 // Lists of cities that are available as source airports and destination airports
-@[source-airport]
+@[airport#source]
     Brussels
     Paris
     Amsterdam
     
-@[destination-airport]
+@[airport#dest]
     Paris
     Amsterdam
     London

From 43090952265627c23769e62cc04e783698ae2b61 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 15 Mar 2021 19:03:51 +0900
Subject: [PATCH 13/24] added ruamel.yml to reqs

---
 requirements/common.txt  | 1 +
 requirements/develop.txt | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements/common.txt b/requirements/common.txt
index ea4305f6..42892472 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -1,3 +1,4 @@
 enum-compat
 future
 six
+ruamel.yaml
diff --git a/requirements/develop.txt b/requirements/develop.txt
index cd362625..b1d16c2b 100644
--- a/requirements/develop.txt
+++ b/requirements/develop.txt
@@ -1,4 +1,4 @@
 -r test.txt
 tox
 pylint
-
+ruamel.yaml

From b52beef30069558cb6bfa432f24e4b4320f7792e Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Wed, 25 Mar 2020 15:53:14 +0900
Subject: [PATCH 14/24] added encoding utf-8 to io

---
 chatette/adapters/_base.py                  | 2 +-
 chatette/adapters/rasa.py                   | 2 +-
 chatette/parsing/line_count_file_wrapper.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/chatette/adapters/_base.py b/chatette/adapters/_base.py
index 2bd056f5..1e8c4139 100644
--- a/chatette/adapters/_base.py
+++ b/chatette/adapters/_base.py
@@ -53,7 +53,7 @@ def write(self, output_directory, examples, synonyms):
                 self.__get_file_name(
                     batch, output_directory, single_file_output
                 )
-            with io.open(output_file_path, 'w') as output_file:
+            with io.open(output_file_path, 'w', encoding='utf-8') as output_file:
                 self._write_batch(output_file, batch)
 
     @classmethod
diff --git a/chatette/adapters/rasa.py b/chatette/adapters/rasa.py
index c5b63570..f6ce655b 100644
--- a/chatette/adapters/rasa.py
+++ b/chatette/adapters/rasa.py
@@ -68,7 +68,7 @@ def _get_base_to_extend(self):
         if self._base_file_contents is None:
             if self._base_filepath is None:
                 return self._get_empty_base()
-            with io.open(self._base_filepath, 'r') as base_file:
+            with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file:
                 self._base_file_contents = json.load(base_file)
             self.check_base_file_contents()
         return self._base_file_contents
diff --git a/chatette/parsing/line_count_file_wrapper.py b/chatette/parsing/line_count_file_wrapper.py
index ace354f1..60fd29f1 100644
--- a/chatette/parsing/line_count_file_wrapper.py
+++ b/chatette/parsing/line_count_file_wrapper.py
@@ -15,7 +15,7 @@ class LineCountFileWrapper(object):
     
     def __init__(self, filepath, mode='r'):
         self.name = cast_to_unicode(filepath)
-        self.f = io.open(filepath, mode)
+        self.f = io.open(filepath, mode, encoding='utf-8')
         self.line_nb = 0
 
     def close(self):

From aba7c75b6e0239fdc1b800983e55b6a54d313b06 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 17 Aug 2020 15:40:33 +0900
Subject: [PATCH 15/24] added rasa YAML adapter

---
 chatette/__main__.py          |   3 +-
 chatette/adapters/factory.py  |   3 +
 chatette/adapters/rasa_yml.py | 104 ++++++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 2 deletions(-)
 create mode 100644 chatette/adapters/rasa_yml.py

diff --git a/chatette/__main__.py b/chatette/__main__.py
index 3379ba94..0d558d43 100644
--- a/chatette/__main__.py
+++ b/chatette/__main__.py
@@ -75,8 +75,7 @@ def _add_optional_arguments(argument_parser):
     argument_parser.add_argument(
         "-a", "--adapter", dest="adapter", required=False,
         type=str, default="rasa",
-        help="Write adapter. " + \
-            "Possible values: ['rasa', 'rasamd' or 'rasa-md', 'jsonl']"
+        help="Write adapter. Possible values: ['rasa', 'jsonl', 'rasamd' or 'rasa-md', 'rasayml']"
     )
     argument_parser.add_argument(
         "--base-file", dest="base_filepath",
diff --git a/chatette/adapters/factory.py b/chatette/adapters/factory.py
index d7426c7b..7b71246b 100644
--- a/chatette/adapters/factory.py
+++ b/chatette/adapters/factory.py
@@ -6,6 +6,7 @@
 from chatette.adapters.jsonl import JsonListAdapter
 from chatette.adapters.rasa import RasaAdapter
 from chatette.adapters.rasa_md import RasaMdAdapter
+from chatette.adapters.rasa_yml import RasaYMLAdapter
 
 
 def create_adapter(adapter_name, base_filepath=None):
@@ -23,6 +24,8 @@ def create_adapter(adapter_name, base_filepath=None):
         return RasaAdapter(base_filepath)
     elif adapter_name in ('rasa-md', 'rasamd'):
         return RasaMdAdapter(base_filepath)
+    elif adapter_name in ('rasa-yml', 'rasayml'):
+        return RasaYMLAdapter(base_filepath)
     elif adapter_name == 'jsonl':
         return JsonListAdapter(base_filepath)
     raise ValueError("Unknown adapter was selected.")
diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py
new file mode 100644
index 00000000..d781775c
--- /dev/null
+++ b/chatette/adapters/rasa_yml.py
@@ -0,0 +1,104 @@
+import os
+import io
+from collections import OrderedDict
+import ruamel.yaml as yaml
+
+from chatette.adapters._base import Adapter
+from chatette.utils import append_to_list_in_dict, cast_to_unicode
+
+def intent_dict_to_list_of_dict(data):
+    list_data = []
+    for key, values in data.items():
+        list_data.append(
+            {
+                "intent": key,
+                "examples": '\n'.join(['- ' + v for v in values]) + '\n'
+            }
+        )
+
+    return list_data
+
+class RasaYMLAdapter(Adapter):
+    def __init__(self, base_filepath=None):
+        super(RasaYMLAdapter, self).__init__(base_filepath, None)
+        self._base_file_contents = None
+
+    @classmethod
+    def _get_file_extension(cls):
+        return "yml"
+
+    def __get_file_name(self, batch, output_directory, single_file):
+        if single_file:
+            return \
+                os.path.join(
+                    output_directory, "nlu." + self._get_file_extension()
+                )
+        raise ValueError(
+            "Tried to generate several files with Rasa YAML adapter."
+        )
+
+    def _write_batch(self, output_file_handle, batch):
+        data = self._get_base_to_extend()
+        prepared_examples = dict()
+        for example in batch.examples:
+            append_to_list_in_dict(
+                prepared_examples,
+                example.intent_name, self.prepare_example(example)
+            )
+        prepared_examples = intent_dict_to_list_of_dict(prepared_examples)
+        prepared_examples.extend(
+            self.__format_synonyms(batch.synonyms)
+        )
+        data['nlu'] = prepared_examples
+        data = cast_to_unicode(data)
+
+        yaml.scalarstring.walk_tree(data)
+        yaml.round_trip_dump(data, output_file_handle, default_flow_style=False, allow_unicode=True)
+
+
+    def prepare_example(self, example):
+        if len(example.entities) == 0:
+            return example.text
+
+        sorted_entities = \
+            sorted(
+                example.entities,
+                reverse=True,
+                key=lambda entity: entity._start_index
+            )
+        result = example.text[:]
+        for entity in sorted_entities:
+            result = \
+                result[:entity._start_index] + "[" + \
+                result[entity._start_index:entity._start_index + entity._len] + \
+                ']{"entity": "' + entity.slot_name + '"}' + \
+                result[entity._start_index + entity._len:] # New rasa entity format
+        return result
+
+    @classmethod
+    def __format_synonyms(cls, synonyms):
+        # {str: [str]} -> [{"value": str, "synonyms": [str]}]
+        return [
+            {
+                "synonym": slot_name,
+                "examples": '\n'.join(['- ' + s for s in synonyms[slot_name]]) + '\n'
+            }
+            for slot_name in synonyms
+            if len(synonyms[slot_name]) > 1
+        ]
+
+    def _get_base_to_extend(self):
+        ### TODO Implement later
+        return self._get_empty_base()        
+        # if self._base_file_contents is None:
+        #     if self._base_filepath is None:
+        #         return self._get_empty_base()
+        #     with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file:
+        #         self._base_file_contents = json.load(base_file)
+        #     self.check_base_file_contents()
+        # return self._base_file_contents
+
+    def _get_empty_base(self):
+        return {
+            "nlu": list()
+        }
\ No newline at end of file

From 62a233ec852ed83701378f4bdbbbc5d7707f87a0 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Fri, 26 Mar 2021 11:58:22 +0900
Subject: [PATCH 16/24] fixed wrong logic

---
 chatette/parsing/parser.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chatette/parsing/parser.py b/chatette/parsing/parser.py
index cd654b1a..3707d3ce 100644
--- a/chatette/parsing/parser.py
+++ b/chatette/parsing/parser.py
@@ -404,6 +404,7 @@ def _parse_rule(self, tokens):
                 current_builder = None
                 leading_space = False
             elif token.type == TerminalType.slot_ref_end:
+                # checking annotation after slot reference
                 rolegroup_annotation, i = self._check_for_annotations(tokens, i)
                 current_builder.slot_rolegroup = rolegroup_annotation
                 rule_contents.append(current_builder.create_concrete())
@@ -512,10 +513,9 @@ def _parse_choice(self, tokens):
         return rules
 
     def _check_for_annotations(self, tokens, i):
-
         if (
-            i == len(tokens)
-            and tokens[i+1].type != TerminalType.annotation_start
+            i+1 == len(tokens)
+            or tokens[i+1].type != TerminalType.annotation_start
         ):
             return None, i
         

From 398147ed8e7440f0e7b2ab28c7359486d690920e Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 24 May 2021 15:41:09 +0900
Subject: [PATCH 17/24] changed str format to support older python

---
 chatette/adapters/rasa_md.py  | 6 +++---
 chatette/adapters/rasa_yml.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/chatette/adapters/rasa_md.py b/chatette/adapters/rasa_md.py
index bd3702c3..cfb36994 100644
--- a/chatette/adapters/rasa_md.py
+++ b/chatette/adapters/rasa_md.py
@@ -79,11 +79,11 @@ def prepare_example(self, example):
             entity_annotation_text = ']{"entity": "' + entity.slot_name
             entity_text = result[entity._start_index:entity._start_index + entity._len]
             if entity_text != entity.value:
-                entity_annotation_text += f', "value": "{entity.value}'
+                entity_annotation_text += ', "value": "{}'.format(entity.value)
             if entity.role is not None:
-                entity_annotation_text += f', "role": "{entity.role}'
+                entity_annotation_text += ', "role": "{}'.format(entity.role)
             if entity.group is not None:
-                entity_annotation_text += f', "group": "{entity.group}'
+                entity_annotation_text += ', "group": "{}'.format(entity.group)
             result = \
                 result[:entity._start_index] + "[" + \
                 entity_text + entity_annotation_text + '"}' + \
diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py
index 4c88bc96..d8e14ba0 100644
--- a/chatette/adapters/rasa_yml.py
+++ b/chatette/adapters/rasa_yml.py
@@ -71,11 +71,11 @@ def prepare_example(self, example):
             entity_annotation_text = ']{"entity": "' + entity.slot_name
             entity_text = result[entity._start_index:entity._start_index + entity._len]
             if entity_text != entity.value:
-                entity_annotation_text += f', "value": "{entity.value}'
+                entity_annotation_text += ', "value": "{}'.format(entity.value)
             if entity.role is not None:
-                entity_annotation_text += f', "role": "{entity.role}'
+                entity_annotation_text += ', "role": "{}'.format(entity.role)
             if entity.group is not None:
-                entity_annotation_text += f', "group": "{entity.group}'
+                entity_annotation_text += ', "group": "{}'.format(entity.group)
             result = \
                 result[:entity._start_index] + "[" + \
                 entity_text + entity_annotation_text + '"}' + \

From 61d23bc26a21ed507a9847173fac73fe0cb80db7 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 24 May 2021 17:00:12 +0900
Subject: [PATCH 18/24] add base_file reading for yaml adapter

---
 chatette/adapters/rasa_yml.py | 103 ++++++++++++++++++++++++++++++----
 1 file changed, 91 insertions(+), 12 deletions(-)

diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py
index d8e14ba0..323a5c57 100644
--- a/chatette/adapters/rasa_yml.py
+++ b/chatette/adapters/rasa_yml.py
@@ -2,10 +2,15 @@
 import io
 from collections import OrderedDict
 import ruamel.yaml as yaml
+from ruamel.yaml.scalarstring import DoubleQuotedScalarString
+from ruamel.yaml.error import YAMLError
+from ruamel.yaml.constructor import DuplicateKeyError
 
 from chatette.adapters._base import Adapter
 from chatette.utils import append_to_list_in_dict, cast_to_unicode
 
+YAML_VERSION = (1, 2)
+
 def intent_dict_to_list_of_dict(data):
     list_data = []
     for key, values in data.items():
@@ -18,6 +23,18 @@ def intent_dict_to_list_of_dict(data):
 
     return list_data
 
+def fix_yaml_loader() -> None:
+    """Ensure that any string read by yaml is represented as unicode."""
+    """Code from Rasa yaml reader"""
+    def construct_yaml_str(self, node):
+        # Override the default string handling function
+        # to always return unicode objects
+        return self.construct_scalar(node)
+
+    yaml.Loader.add_constructor("tag:yaml.org,2002:str", construct_yaml_str)
+    yaml.SafeLoader.add_constructor("tag:yaml.org,2002:str", construct_yaml_str)
+
+
 class RasaYMLAdapter(Adapter):
     def __init__(self, base_filepath=None):
         super(RasaYMLAdapter, self).__init__(base_filepath, None)
@@ -94,18 +111,80 @@ def __format_synonyms(cls, synonyms):
             if len(synonyms[slot_name]) > 1
         ]
 
+    def _read_yaml(self, content):
+        fix_yaml_loader()
+        yaml_parser = yaml.YAML(typ='safe')
+        yaml_parser.version = YAML_VERSION
+        yaml_parser.preserve_quotes = True
+        yaml.allow_duplicate_keys = False
+
+        return yaml_parser.load(content)
+
     def _get_base_to_extend(self):
-        ### TODO Implement later
-        return self._get_empty_base()        
-        # if self._base_file_contents is None:
-        #     if self._base_filepath is None:
-        #         return self._get_empty_base()
-        #     with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file:
-        #         self._base_file_contents = json.load(base_file)
-        #     self.check_base_file_contents()
-        # return self._base_file_contents
+        if self._base_file_contents is None:
+            if self._base_filepath is None:
+                return self._get_empty_base()
+            with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file:
+                try:
+                    self._base_file_contents = self._read_yaml(base_file.read())
+                except (YAMLError, DuplicateKeyError) as e:
+                    raise YamlSyntaxException(self._base_filepath, e) 
+            self.check_base_file_contents()
+        return self._base_file_contents
 
     def _get_empty_base(self):
-        return {
-            "nlu": list()
-        }
\ No newline at end of file
+        base = OrderedDict()
+        base['version'] = DoubleQuotedScalarString('2.0')
+        base['nlu'] = list()
+        return base
+
+    def check_base_file_contents(self): 
+        """
+        Checks that `self._base_file_contents` contains well formatted NLU dictionary.
+        Throws a `SyntaxError` if the data is incorrect.
+        """
+        if self._base_file_contents is None:
+            return
+        if not isinstance(self._base_file_contents, dict):
+            self._base_file_contents = None
+            raise SyntaxError(
+                "Couldn't load valid data from base file '" + \
+                self._base_filepath + "'"
+            )
+        else:
+            if "nlu" not in self._base_file_contents:
+                self._base_file_contents = None
+                raise SyntaxError(
+                    "Expected 'nlu' as a root of base file '" + \
+                    self._base_filepath + "'")
+
+
+class YamlSyntaxException(Exception):
+    """Raised when a YAML file can not be parsed properly due to a syntax error."""
+    """code from rasa.shared.exceptions.YamlSyntaxException"""
+
+    def __init__(self, filename, underlying_yaml_exception):
+        self.filename = filename
+        self.underlying_yaml_exception = underlying_yaml_exception
+
+    def __str__(self):
+        if self.filename:
+            exception_text = "Failed to read '{}'.".format(self.filename)
+        else:
+            exception_text = "Failed to read YAML."
+
+        if self.underlying_yaml_exception:
+            self.underlying_yaml_exception.warn = None
+            self.underlying_yaml_exception.note = None
+            exception_text += " {}".format(self.underlying_yaml_exception)
+
+        if self.filename:
+            exception_text = exception_text.replace(
+                'in "<unicode string>"', 'in "{}"'.format(self.filename)
+            )
+
+        exception_text += (
+            "\n\nYou can use https://yamlchecker.com/ to validate the "
+            "YAML syntax of your file."
+        )
+        return exception_text
\ No newline at end of file

From d80f3000088a0e3ec1bd0dfc6ac30ef1c5e4cf0b Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 21 Jun 2021 17:00:52 +0900
Subject: [PATCH 19/24] add ruamel to requirements in setup.py

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index b4e99e1a..62d861ea 100644
--- a/setup.py
+++ b/setup.py
@@ -33,5 +33,6 @@
         "enum-compat",
         "future",
         "six",
+        "ruamel.yaml",
     ]
 )

From 3883d52c2de8173101a186c0bfb8eaef4736e1cc Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 21 Jun 2021 17:01:44 +0900
Subject: [PATCH 20/24] remove unnecessary unit_type assignment

---
 chatette/units/modifiable/unit_reference.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/chatette/units/modifiable/unit_reference.py b/chatette/units/modifiable/unit_reference.py
index fcc83e65..726746a1 100644
--- a/chatette/units/modifiable/unit_reference.py
+++ b/chatette/units/modifiable/unit_reference.py
@@ -137,7 +137,6 @@ def __init__(self, identifier, unit_type, leading_space, modifiers, rolegroup):
         super(SlotRoleGroupReference, self).__init__(
             identifier, unit_type, leading_space, modifiers
         )
-        self._unit_type = 'slot'
 
         # dictionary {"role": "value"}, or {"group": "value"}, or both
         self._role = rolegroup.get('role', None)

From c8e785565623fae12f12d3a403303f710822d83b Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 21 Jun 2021 17:02:17 +0900
Subject: [PATCH 21/24] add rolegroupreference unit testing

---
 tests/unit-testing/parsing/test_init.py | 28 ++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/tests/unit-testing/parsing/test_init.py b/tests/unit-testing/parsing/test_init.py
index bf791e68..e0a4ea4f 100644
--- a/tests/unit-testing/parsing/test_init.py
+++ b/tests/unit-testing/parsing/test_init.py
@@ -15,7 +15,7 @@
 
 from chatette.modifiers.representation import ModifiersRepresentation
 from chatette.units.modifiable.choice import Choice
-from chatette.units.modifiable.unit_reference import UnitReference
+from chatette.units.modifiable.unit_reference import UnitReference, SlotRoleGroupReference
 from chatette.units.modifiable.definitions.alias import AliasDefinition
 from chatette.units.modifiable.definitions.slot import SlotDefinition
 from chatette.units.modifiable.definitions.intent import IntentDefinition
@@ -90,6 +90,32 @@ def test_create_concrete(self):
         assert unit_ref._unit_type == UnitType.alias
         assert unit_ref._name == "id"
 
+    def test_create_concrete_rolegroup_ref(self):
+        builder = UnitRefBuilder()
+        builder.identifier = "id"
+
+        with pytest.raises(ValueError):
+            builder.create_concrete()
+
+        builder.type = UnitType.slot
+        modifiers = builder._build_modifiers_repr()
+        assert isinstance(modifiers, ModifiersRepresentation)
+        assert not modifiers.casegen
+        assert not modifiers.randgen
+        assert modifiers.randgen.name is None
+        assert modifiers.randgen.percentage == 50
+        assert not modifiers.randgen.opposite
+
+        annotation = {'role': 'role', 'group': 'group'}
+        builder.slot_rolegroup = annotation
+        unit_ref = builder.create_concrete()
+        assert isinstance(unit_ref, SlotRoleGroupReference)
+        assert not unit_ref._leading_space
+        assert unit_ref._unit_type == UnitType.slot
+        assert unit_ref._name == "id"
+        assert unit_ref._role == annotation['role']
+        assert unit_ref._group == annotation['group']
+
 class TestUnitDefBuilder(object):
     def test_creation(self):
         with pytest.raises(TypeError):

From 41eddc4f4f2449cc000d55e0aa9d0fd6883acf47 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 21 Jun 2021 17:02:35 +0900
Subject: [PATCH 22/24] add test system for rolegroup

---
 .../generate-all/slotrolegroup.chatette       | 25 +++++++++++++++++++
 .../generate-all/slotrolegroup.solution       | 22 ++++++++++++++++
 tests/system-testing/test_system.py           |  2 +-
 3 files changed, 48 insertions(+), 1 deletion(-)
 create mode 100644 tests/system-testing/inputs/generate-all/slotrolegroup.chatette
 create mode 100644 tests/system-testing/inputs/generate-all/slotrolegroup.solution

diff --git a/tests/system-testing/inputs/generate-all/slotrolegroup.chatette b/tests/system-testing/inputs/generate-all/slotrolegroup.chatette
new file mode 100644
index 00000000..2900e8ea
--- /dev/null
+++ b/tests/system-testing/inputs/generate-all/slotrolegroup.chatette
@@ -0,0 +1,25 @@
+%[single_role]
+    @[slot]("role":"role")
+
+%[single_group]
+    @[slot]("group":"group")
+
+%[single_role_group]
+    @[slot]("role":"role", "group":"group")
+
+%[multiple_role]
+    @[slot]("role":"role1") @[slot]("role":"role2")
+
+%[multiple_group]
+    @[slot]("group":"group1") @[slot]("group":"group2")
+
+%[multiple_role_group]
+    @[slot]("role":"role1", "group":"group1") @[slot]("role":"role2", "group":"group2")
+    
+%[not_role_group]
+    @[slot]("role":"role"
+
+@[slot]
+    slot one
+    slot1
+
diff --git a/tests/system-testing/inputs/generate-all/slotrolegroup.solution b/tests/system-testing/inputs/generate-all/slotrolegroup.solution
new file mode 100644
index 00000000..d12aece8
--- /dev/null
+++ b/tests/system-testing/inputs/generate-all/slotrolegroup.solution
@@ -0,0 +1,22 @@
+# Contains all possible examples for the Chatette template file with the same name.
+single_role>>>slot one
+single_role>>>slot1
+single_group>>>slot one
+single_group>>>slot1
+single_role_group>>>slot one
+single_role_group>>>slot1
+multiple_role>>>slot one slot one
+multiple_role>>>slot one slot1
+multiple_role>>>slot1 slot one
+multiple_role>>>slot1 slot1
+multiple_group>>>slot one slot one
+multiple_group>>>slot one slot1
+multiple_group>>>slot1 slot one
+multiple_group>>>slot1 slot1
+multiple_role_group>>>slot one slot one
+multiple_role_group>>>slot one slot1
+multiple_role_group>>>slot1 slot one
+multiple_role_group>>>slot1 slot1
+
+not_role_group>>>slot one("role":"role"
+not_role_group>>>slot1("role":"role"
\ No newline at end of file
diff --git a/tests/system-testing/test_system.py b/tests/system-testing/test_system.py
index 5a25eefb..7744ec23 100644
--- a/tests/system-testing/test_system.py
+++ b/tests/system-testing/test_system.py
@@ -157,7 +157,7 @@ def test_generate_all_training(self):
         input_filenames = [
             "simplest.chatette", "only-words.chatette",
             "words-and-groups.chatette", "alias.chatette", "include.chatette",
-            "slot.chatette"
+            "slot.chatette", "slotrolegroup.chatette"
         ]
         for filename in input_filenames:
             file_path = os.path.join(input_dir_path, filename)

From 2dfdd0e9706d31837cb4077277d60307218c0787 Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Mon, 21 Jun 2021 17:16:08 +0900
Subject: [PATCH 23/24] added missing quotation to entity text

---
 chatette/adapters/rasa_md.py  | 6 +++---
 chatette/adapters/rasa_yml.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/chatette/adapters/rasa_md.py b/chatette/adapters/rasa_md.py
index cfb36994..a3434b96 100644
--- a/chatette/adapters/rasa_md.py
+++ b/chatette/adapters/rasa_md.py
@@ -79,11 +79,11 @@ def prepare_example(self, example):
             entity_annotation_text = ']{"entity": "' + entity.slot_name
             entity_text = result[entity._start_index:entity._start_index + entity._len]
             if entity_text != entity.value:
-                entity_annotation_text += ', "value": "{}'.format(entity.value)
+                entity_annotation_text += '", "value": "{}'.format(entity.value)
             if entity.role is not None:
-                entity_annotation_text += ', "role": "{}'.format(entity.role)
+                entity_annotation_text += '", "role": "{}'.format(entity.role)
             if entity.group is not None:
-                entity_annotation_text += ', "group": "{}'.format(entity.group)
+                entity_annotation_text += '", "group": "{}'.format(entity.group)
             result = \
                 result[:entity._start_index] + "[" + \
                 entity_text + entity_annotation_text + '"}' + \
diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py
index 323a5c57..49709b44 100644
--- a/chatette/adapters/rasa_yml.py
+++ b/chatette/adapters/rasa_yml.py
@@ -88,11 +88,11 @@ def prepare_example(self, example):
             entity_annotation_text = ']{"entity": "' + entity.slot_name
             entity_text = result[entity._start_index:entity._start_index + entity._len]
             if entity_text != entity.value:
-                entity_annotation_text += ', "value": "{}'.format(entity.value)
+                entity_annotation_text += '", "value": "{}'.format(entity.value)
             if entity.role is not None:
-                entity_annotation_text += ', "role": "{}'.format(entity.role)
+                entity_annotation_text += '", "role": "{}'.format(entity.role)
             if entity.group is not None:
-                entity_annotation_text += ', "group": "{}'.format(entity.group)
+                entity_annotation_text += '", "group": "{}'.format(entity.group)
             result = \
                 result[:entity._start_index] + "[" + \
                 entity_text + entity_annotation_text + '"}' + \

From 29f7109633cfa93a6085b07b6807ab1eb9ac276b Mon Sep 17 00:00:00 2001
From: Tommy Gunawan <tomgun132@gmail.com>
Date: Thu, 12 Aug 2021 18:49:54 +0900
Subject: [PATCH 24/24] changed OrderedDict to ruamel CommentedMap

---
 chatette/adapters/rasa_yml.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py
index 49709b44..8b5acc07 100644
--- a/chatette/adapters/rasa_yml.py
+++ b/chatette/adapters/rasa_yml.py
@@ -1,11 +1,10 @@
 import os
 import io
-from collections import OrderedDict
 import ruamel.yaml as yaml
 from ruamel.yaml.scalarstring import DoubleQuotedScalarString
 from ruamel.yaml.error import YAMLError
 from ruamel.yaml.constructor import DuplicateKeyError
-
+from ruamel.yaml.comments import CommentedMap as OrderedDict
 from chatette.adapters._base import Adapter
 from chatette.utils import append_to_list_in_dict, cast_to_unicode