From 5b1cd72ecf0f49903d3af7025a9325bf3a0f3286 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Wed, 25 Mar 2020 15:53:14 +0900 Subject: [PATCH 01/24] added encoding utf-8 to io --- chatette/adapters/_base.py | 2 +- chatette/adapters/rasa.py | 2 +- chatette/parsing/line_count_file_wrapper.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chatette/adapters/_base.py b/chatette/adapters/_base.py index 71c7900d..8ae57785 100644 --- a/chatette/adapters/_base.py +++ b/chatette/adapters/_base.py @@ -54,7 +54,7 @@ def write(self, output_directory, examples, synonyms): self.__get_file_name( batch, output_directory, single_file_output ) - with io.open(output_file_path, 'w') as output_file: + with io.open(output_file_path, 'w', encoding='utf-8') as output_file: self._write_batch(output_file, batch) @classmethod diff --git a/chatette/adapters/rasa.py b/chatette/adapters/rasa.py index c5b63570..f6ce655b 100644 --- a/chatette/adapters/rasa.py +++ b/chatette/adapters/rasa.py @@ -68,7 +68,7 @@ def _get_base_to_extend(self): if self._base_file_contents is None: if self._base_filepath is None: return self._get_empty_base() - with io.open(self._base_filepath, 'r') as base_file: + with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file: self._base_file_contents = json.load(base_file) self.check_base_file_contents() return self._base_file_contents diff --git a/chatette/parsing/line_count_file_wrapper.py b/chatette/parsing/line_count_file_wrapper.py index ace354f1..60fd29f1 100644 --- a/chatette/parsing/line_count_file_wrapper.py +++ b/chatette/parsing/line_count_file_wrapper.py @@ -15,7 +15,7 @@ class LineCountFileWrapper(object): def __init__(self, filepath, mode='r'): self.name = cast_to_unicode(filepath) - self.f = io.open(filepath, mode) + self.f = io.open(filepath, mode, encoding='utf-8') self.line_nb = 0 def close(self): From eef5212d698781ba86055b0857b75ceda8448d1c Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 17 Aug 2020 15:40:33 +0900 Subject: [PATCH 02/24] added rasa YAML adapter --- chatette/__main__.py | 2 +- chatette/adapters/factory.py | 3 + chatette/adapters/rasa_yml.py | 104 ++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 chatette/adapters/rasa_yml.py diff --git a/chatette/__main__.py b/chatette/__main__.py index 08be094f..28a9f121 100644 --- a/chatette/__main__.py +++ b/chatette/__main__.py @@ -75,7 +75,7 @@ def _add_optional_arguments(argument_parser): argument_parser.add_argument( "-a", "--adapter", dest="adapter", required=False, type=str, default="rasa", - help="Write adapter. Possible values: ['rasa', 'jsonl', 'rasamd']" + help="Write adapter. Possible values: ['rasa', 'jsonl', 'rasamd', 'rasayml']" ) argument_parser.add_argument( "--base-file", dest="base_filepath", diff --git a/chatette/adapters/factory.py b/chatette/adapters/factory.py index d7426c7b..7b71246b 100644 --- a/chatette/adapters/factory.py +++ b/chatette/adapters/factory.py @@ -6,6 +6,7 @@ from chatette.adapters.jsonl import JsonListAdapter from chatette.adapters.rasa import RasaAdapter from chatette.adapters.rasa_md import RasaMdAdapter +from chatette.adapters.rasa_yml import RasaYMLAdapter def create_adapter(adapter_name, base_filepath=None): @@ -23,6 +24,8 @@ def create_adapter(adapter_name, base_filepath=None): return RasaAdapter(base_filepath) elif adapter_name in ('rasa-md', 'rasamd'): return RasaMdAdapter(base_filepath) + elif adapter_name in ('rasa-yml', 'rasayml'): + return RasaYMLAdapter(base_filepath) elif adapter_name == 'jsonl': return JsonListAdapter(base_filepath) raise ValueError("Unknown adapter was selected.") diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py new file mode 100644 index 00000000..d781775c --- /dev/null +++ b/chatette/adapters/rasa_yml.py @@ -0,0 +1,104 @@ +import os +import io +from collections import OrderedDict +import ruamel.yaml as yaml + +from chatette.adapters._base import Adapter +from chatette.utils import append_to_list_in_dict, cast_to_unicode + +def intent_dict_to_list_of_dict(data): + list_data = [] + for key, values in data.items(): + list_data.append( + { + "intent": key, + "examples": '\n'.join(['- ' + v for v in values]) + '\n' + } + ) + + return list_data + +class RasaYMLAdapter(Adapter): + def __init__(self, base_filepath=None): + super(RasaYMLAdapter, self).__init__(base_filepath, None) + self._base_file_contents = None + + @classmethod + def _get_file_extension(cls): + return "yml" + + def __get_file_name(self, batch, output_directory, single_file): + if single_file: + return \ + os.path.join( + output_directory, "nlu." + self._get_file_extension() + ) + raise ValueError( + "Tried to generate several files with Rasa YAML adapter." + ) + + def _write_batch(self, output_file_handle, batch): + data = self._get_base_to_extend() + prepared_examples = dict() + for example in batch.examples: + append_to_list_in_dict( + prepared_examples, + example.intent_name, self.prepare_example(example) + ) + prepared_examples = intent_dict_to_list_of_dict(prepared_examples) + prepared_examples.extend( + self.__format_synonyms(batch.synonyms) + ) + data['nlu'] = prepared_examples + data = cast_to_unicode(data) + + yaml.scalarstring.walk_tree(data) + yaml.round_trip_dump(data, output_file_handle, default_flow_style=False, allow_unicode=True) + + + def prepare_example(self, example): + if len(example.entities) == 0: + return example.text + + sorted_entities = \ + sorted( + example.entities, + reverse=True, + key=lambda entity: entity._start_index + ) + result = example.text[:] + for entity in sorted_entities: + result = \ + result[:entity._start_index] + "[" + \ + result[entity._start_index:entity._start_index + entity._len] + \ + ']{"entity": "' + entity.slot_name + '"}' + \ + result[entity._start_index + entity._len:] # New rasa entity format + return result + + @classmethod + def __format_synonyms(cls, synonyms): + # {str: [str]} -> [{"value": str, "synonyms": [str]}] + return [ + { + "synonym": slot_name, + "examples": '\n'.join(['- ' + s for s in synonyms[slot_name]]) + '\n' + } + for slot_name in synonyms + if len(synonyms[slot_name]) > 1 + ] + + def _get_base_to_extend(self): + ### TODO Implement later + return self._get_empty_base() + # if self._base_file_contents is None: + # if self._base_filepath is None: + # return self._get_empty_base() + # with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file: + # self._base_file_contents = json.load(base_file) + # self.check_base_file_contents() + # return self._base_file_contents + + def _get_empty_base(self): + return { + "nlu": list() + } \ No newline at end of file From 02e72f55c8f4fb36a7287dc73e934c7e797046b5 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 8 Mar 2021 16:47:07 +0900 Subject: [PATCH 03/24] add annotation after slot in parser --- chatette/parsing/lexing/rule_unit_ref.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/chatette/parsing/lexing/rule_unit_ref.py b/chatette/parsing/lexing/rule_unit_ref.py index 9a0a3c8f..f692dfad 100644 --- a/chatette/parsing/lexing/rule_unit_ref.py +++ b/chatette/parsing/lexing/rule_unit_ref.py @@ -11,6 +11,7 @@ extract_identifier, \ CASE_GEN_SYM, UNIT_END_SYM +from chatette.parsing.lexing.rule_annotation import RuleAnnotation from chatette.parsing.lexing.rule_unit_start import RuleUnitStart from chatette.parsing.lexing.rule_variation import RuleVariation from chatette.parsing.lexing.rule_rand_gen import RuleRandGen @@ -55,11 +56,13 @@ def _apply_strategy(self, **kwargs): "using character '" + UNIT_END_SYM + "')." return False + is_slot = False # TODO maybe making a function for this would be useful if self._tokens[0].type == TerminalType.alias_ref_start: unit_end_type = TerminalType.alias_ref_end elif self._tokens[0].type == TerminalType.slot_ref_start: unit_end_type = TerminalType.slot_ref_end + is_slot = True elif self._tokens[0].type == TerminalType.intent_ref_start: unit_end_type = TerminalType.intent_ref_end else: # Should never happen @@ -72,5 +75,13 @@ def _apply_strategy(self, **kwargs): self._next_index += 1 self._update_furthest_matched_index() self._tokens.append(LexicalToken(unit_end_type, UNIT_END_SYM)) - + + # This is for adding new rasa training mode that has role and group entity + # Reference: https://rasa.com/docs/rasa/nlu-training-data/#entities-roles-and-groups + annotation_rule = RuleAnnotation(self._text, self._next_index) + if is_slot and annotation_rule.matches(): + self._next_index = annotation_rule.get_next_index_to_match() + self._update_furthest_matched_index() + self._tokens.extend(annotation_rule.get_lexical_tokens()) + return True From 95bdfa4bd4260aa6cba8c66f7c57dfa720c45def Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Tue, 9 Mar 2021 15:24:38 +0900 Subject: [PATCH 04/24] add annotation check in parser --- chatette/parsing/parser.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/chatette/parsing/parser.py b/chatette/parsing/parser.py index e2563bdf..ae2c9906 100644 --- a/chatette/parsing/parser.py +++ b/chatette/parsing/parser.py @@ -505,3 +505,26 @@ def _parse_choice(self, tokens): ) return rules + + def _check_for_annotations(self, tokens, i): + + if ( + i == len(tokens) + and tokens[i+1].type != TerminalType.annotation_start + ): + return None, i-1 + + end_annotation_idx = 0 + for j, token in enumerate(tokens[i:]): + if token.type == TerminalType.annotation_end: + end_annotation_idx = i+j + + if end_annotation_idx > i: + annotation_tokens = tokens[i:end_annotation_idx+1] + annotation = self._annotation_tokens_to_dict(annotation_tokens) + return annotation, end_annotation_idx + else: + raise ValueError( # Should never happen + "Something wrong happens when parsing annotation" +\ + "for entity role or group." + ) From 70af8c8d287c5500bf1a005023c3ec905ad90a9a Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Wed, 10 Mar 2021 11:08:58 +0900 Subject: [PATCH 05/24] add rolegroup annotation check --- chatette/parsing/parser.py | 40 +++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/chatette/parsing/parser.py b/chatette/parsing/parser.py index ae2c9906..cd654b1a 100644 --- a/chatette/parsing/parser.py +++ b/chatette/parsing/parser.py @@ -30,7 +30,7 @@ from chatette.units.rule import Rule from chatette.parsing import \ - ChoiceBuilder, UnitRefBuilder, \ + ChoiceBuilder, UnitRefBuilder,\ AliasDefBuilder, SlotDefBuilder, IntentDefBuilder @@ -398,12 +398,17 @@ def _parse_rule(self, tokens): elif ( token.type in \ (TerminalType.alias_ref_end, - TerminalType.slot_ref_end, TerminalType.intent_ref_end) ): rule_contents.append(current_builder.create_concrete()) current_builder = None leading_space = False + elif token.type == TerminalType.slot_ref_end: + rolegroup_annotation, i = self._check_for_annotations(tokens, i) + current_builder.slot_rolegroup = rolegroup_annotation + rule_contents.append(current_builder.create_concrete()) + current_builder = None + leading_space = False elif token.type == TerminalType.unit_identifier: current_builder.identifier = token.text elif token.type == TerminalType.choice_start: @@ -512,19 +517,22 @@ def _check_for_annotations(self, tokens, i): i == len(tokens) and tokens[i+1].type != TerminalType.annotation_start ): - return None, i-1 + return None, i - end_annotation_idx = 0 - for j, token in enumerate(tokens[i:]): + annotation = {} + current_key = None + for j, token in enumerate(tokens[i+1:]): if token.type == TerminalType.annotation_end: - end_annotation_idx = i+j - - if end_annotation_idx > i: - annotation_tokens = tokens[i:end_annotation_idx+1] - annotation = self._annotation_tokens_to_dict(annotation_tokens) - return annotation, end_annotation_idx - else: - raise ValueError( # Should never happen - "Something wrong happens when parsing annotation" +\ - "for entity role or group." - ) + i += j+1 + break + elif token.type == TerminalType.key: + current_key = token.text + elif token.type == TerminalType.value: + if current_key in annotation: + self.input_file_manager.syntax_error( + "Annotation contained the key '" + current_key + \ + "' twice." + ) + annotation[current_key] = token.text + + return annotation, i From 479cc3214e9625bbd0082db56120246a8a1ed913 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Wed, 10 Mar 2021 11:09:15 +0900 Subject: [PATCH 06/24] add comment --- chatette/parsing/lexing/rule_unit_ref.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chatette/parsing/lexing/rule_unit_ref.py b/chatette/parsing/lexing/rule_unit_ref.py index f692dfad..3a7f1e4d 100644 --- a/chatette/parsing/lexing/rule_unit_ref.py +++ b/chatette/parsing/lexing/rule_unit_ref.py @@ -79,6 +79,8 @@ def _apply_strategy(self, **kwargs): # This is for adding new rasa training mode that has role and group entity # Reference: https://rasa.com/docs/rasa/nlu-training-data/#entities-roles-and-groups annotation_rule = RuleAnnotation(self._text, self._next_index) + + # ? Should we raise error if RuleAnnotation doesn't match, i.e. wrong pattern if is_slot and annotation_rule.matches(): self._next_index = annotation_rule.get_next_index_to_match() self._update_furthest_matched_index() From ceb9da341cf232ef7f0cfbe5380151a886311b7d Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Wed, 10 Mar 2021 11:09:42 +0900 Subject: [PATCH 07/24] added choice to choose unitref --- chatette/parsing/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/chatette/parsing/__init__.py b/chatette/parsing/__init__.py index f570877c..753b1c46 100644 --- a/chatette/parsing/__init__.py +++ b/chatette/parsing/__init__.py @@ -16,7 +16,7 @@ from future.utils import with_metaclass from chatette.units.modifiable.choice import Choice -from chatette.units.modifiable.unit_reference import UnitReference +from chatette.units.modifiable.unit_reference import UnitReference, SlotRoleGroupReference from chatette.units.modifiable.definitions.alias import AliasDefinition from chatette.units.modifiable.definitions.slot import SlotDefinition from chatette.units.modifiable.definitions.intent import IntentDefinition @@ -91,6 +91,7 @@ def __init__(self): self.identifier = None self.variation = None self.arg_value = None + self.slot_rolegroup = None def _check_information(self): super(UnitRefBuilder, self)._check_information() @@ -108,6 +109,12 @@ def _build_modifiers_repr(self): def create_concrete(self): self._check_information() + if self.slot_rolegroup is not None: + return SlotRoleGroupReference( + self.identifier, self.type, + self.leading_space, self._build_modifiers_repr(), + self.slot_rolegroup + ) return UnitReference( self.identifier, self.type, self.leading_space, self._build_modifiers_repr() From 3d34c2594843e81990bd91e81560523cfca32775 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Wed, 10 Mar 2021 11:10:01 +0900 Subject: [PATCH 08/24] added new slotref class for rolegroup --- chatette/units/modifiable/unit_reference.py | 51 +++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/chatette/units/modifiable/unit_reference.py b/chatette/units/modifiable/unit_reference.py index 02b1c5e6..fcc83e65 100644 --- a/chatette/units/modifiable/unit_reference.py +++ b/chatette/units/modifiable/unit_reference.py @@ -126,3 +126,54 @@ def as_template_str(self): if self._leading_space: result = ' ' + result return result + + +class SlotRoleGroupReference(UnitReference): + """ + Represents a reference to a unit definition that can be contained + in a template rule. + """ + def __init__(self, identifier, unit_type, leading_space, modifiers, rolegroup): + super(SlotRoleGroupReference, self).__init__( + identifier, unit_type, leading_space, modifiers + ) + self._unit_type = 'slot' + + # dictionary {"role": "value"}, or {"group": "value"}, or both + self._role = rolegroup.get('role', None) + self._group = rolegroup.get('group', None) + + def _generate_random_strategy(self): + generated_example = super()._generate_random_strategy() + + for ent in generated_example.entities: + if self._role is not None: + ent.role = self._role + if self._group is not None: + ent.group = self._group + return generated_example + + + def _generate_all_strategy(self): + generated_examples = super()._generate_all_strategy() + + for ex in generated_examples: + for ent in ex.entities: + if self._role is not None: + ent.role = self._role + if self._group is not None: + ent.group = self._group + + return generated_examples + + def _generate_n_strategy(self, n): + generated_examples = super()._generate_n_strategy() + + for ex in generated_examples: + for ent in ex.entities: + if self._role is not None: + ent.role = self._role + if self._group is not None: + ent.group = self._group + + return generated_examples \ No newline at end of file From f4531355b7f00fee8cabf14421b821e17595a930 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Wed, 10 Mar 2021 11:10:19 +0900 Subject: [PATCH 09/24] added role group to Entity unit --- chatette/units/__init__.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/chatette/units/__init__.py b/chatette/units/__init__.py index bfbd3ff0..fb5070ae 100644 --- a/chatette/units/__init__.py +++ b/chatette/units/__init__.py @@ -125,11 +125,13 @@ class Entity(object): Represents an entity as it will be contained in examples (instances of `Example`). """ - def __init__(self, name, length, value=None, start_index=0): + def __init__(self, name, length, value=None, start_index=0, role=None, group=None): self.slot_name = name # name of the entity (not the associated text) self.value = value self._len = length self._start_index = start_index + self.role = role + self.group = group def _remove_leading_space(self): """ @@ -146,17 +148,27 @@ def _remove_leading_space(self): return True def as_dict(self): - return { + entity_dict = { "slot-name": self.slot_name, "value": self.value, "start-index": self._start_index, "end-index": self._start_index + self._len } + if self.role is not None: + entity_dict['role'] = self.role + if self.group is not None: + entity_dict['group'] = self.group + return entity_dict def __repr__(self): representation = "entity '" + self.slot_name + "'" if self.value is not None: representation += ":'" + self.value + "'" + # ? There might be better representation format? + if self.role is not None: + representation += ":'" + self.role + "'" + if self.group is not None: + representation += ":'" + self.group + "'" return representation def __str__(self): return \ From 7a34afef4707b7f7ee616538a28647f124aaa718 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Wed, 10 Mar 2021 13:20:50 +0900 Subject: [PATCH 10/24] fixed entity repr with rolegroup --- chatette/units/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chatette/units/__init__.py b/chatette/units/__init__.py index fb5070ae..e22a46d3 100644 --- a/chatette/units/__init__.py +++ b/chatette/units/__init__.py @@ -166,9 +166,9 @@ def __repr__(self): representation += ":'" + self.value + "'" # ? There might be better representation format? if self.role is not None: - representation += ":'" + self.role + "'" + representation += ", 'role' :'" + self.role + "'" if self.group is not None: - representation += ":'" + self.group + "'" + representation += ", 'group' :'" + self.group + "'" return representation def __str__(self): return \ From 5e62dcf1388576069abd9093e6e17bc1232c889b Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 15 Mar 2021 19:03:04 +0900 Subject: [PATCH 11/24] added role,group,value format to adapter --- chatette/adapters/rasa.py | 8 +++++++- chatette/adapters/rasa_md.py | 15 +++++++++++---- chatette/adapters/rasa_yml.py | 11 +++++++++-- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/chatette/adapters/rasa.py b/chatette/adapters/rasa.py index f6ce655b..2ffe4547 100644 --- a/chatette/adapters/rasa.py +++ b/chatette/adapters/rasa.py @@ -38,12 +38,18 @@ def _write_batch(self, output_file_handle, batch): def prepare_example(self, example): def entity_to_rasa(entity): - return { + entity_dict = { "entity": entity.slot_name, "value": entity.value, "start": entity._start_index, "end": entity._start_index + entity._len, } + if entity.role is not None: + entity_dict['role'] = entity.role + if entity.group is not None: + entity_dict['group'] = entity.group + + return entity_dict return { "intent": example.intent_name, diff --git a/chatette/adapters/rasa_md.py b/chatette/adapters/rasa_md.py index a92a3dc6..bd3702c3 100644 --- a/chatette/adapters/rasa_md.py +++ b/chatette/adapters/rasa_md.py @@ -76,11 +76,18 @@ def prepare_example(self, example): ) result = example.text[:] for entity in sorted_entities: + entity_annotation_text = ']{"entity": "' + entity.slot_name + entity_text = result[entity._start_index:entity._start_index + entity._len] + if entity_text != entity.value: + entity_annotation_text += f', "value": "{entity.value}' + if entity.role is not None: + entity_annotation_text += f', "role": "{entity.role}' + if entity.group is not None: + entity_annotation_text += f', "group": "{entity.group}' result = \ result[:entity._start_index] + "[" + \ - result[entity._start_index:entity._start_index + entity._len] + \ - "](" + entity.slot_name + ")" + \ - result[entity._start_index + entity._len:] + entity_text + entity_annotation_text + '"}' + \ + result[entity._start_index + entity._len:] # New rasa entity format return result @@ -105,7 +112,7 @@ def _get_base_to_extend(self): if self._base_file_contents is None: if self._base_filepath is None: return self._get_empty_base() - with io.open(self._base_filepath, 'r') as base_file: + with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file: self._base_file_contents = ''.join(base_file.readlines()) self.check_base_file_contents() return self._base_file_contents diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py index d781775c..4c88bc96 100644 --- a/chatette/adapters/rasa_yml.py +++ b/chatette/adapters/rasa_yml.py @@ -68,10 +68,17 @@ def prepare_example(self, example): ) result = example.text[:] for entity in sorted_entities: + entity_annotation_text = ']{"entity": "' + entity.slot_name + entity_text = result[entity._start_index:entity._start_index + entity._len] + if entity_text != entity.value: + entity_annotation_text += f', "value": "{entity.value}' + if entity.role is not None: + entity_annotation_text += f', "role": "{entity.role}' + if entity.group is not None: + entity_annotation_text += f', "group": "{entity.group}' result = \ result[:entity._start_index] + "[" + \ - result[entity._start_index:entity._start_index + entity._len] + \ - ']{"entity": "' + entity.slot_name + '"}' + \ + entity_text + entity_annotation_text + '"}' + \ result[entity._start_index + entity._len:] # New rasa entity format return result From 2d9ee0bb49ae706191af57f7e0864d7a11c6707d Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 15 Mar 2021 19:03:36 +0900 Subject: [PATCH 12/24] edited example to test role,group --- examples/simple/airport/aliases.chatette | 4 ++-- examples/simple/airport/slots/cities.chatette | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/simple/airport/aliases.chatette b/examples/simple/airport/aliases.chatette index 4e5fa41e..12ccfd89 100644 --- a/examples/simple/airport/aliases.chatette +++ b/examples/simple/airport/aliases.chatette @@ -23,6 +23,6 @@ register ~[from airport] - from @[source-airport] + from @[airport#source]("role":"source") ~[to airport] - [to go?] to @[source-airport] + [to go?] to @[airport#dest]('role':'destination') diff --git a/examples/simple/airport/slots/cities.chatette b/examples/simple/airport/slots/cities.chatette index 526d6877..a69bd5fb 100644 --- a/examples/simple/airport/slots/cities.chatette +++ b/examples/simple/airport/slots/cities.chatette @@ -1,10 +1,10 @@ // Lists of cities that are available as source airports and destination airports -@[source-airport] +@[airport#source] Brussels Paris Amsterdam -@[destination-airport] +@[airport#dest] Paris Amsterdam London From 43090952265627c23769e62cc04e783698ae2b61 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 15 Mar 2021 19:03:51 +0900 Subject: [PATCH 13/24] added ruamel.yml to reqs --- requirements/common.txt | 1 + requirements/develop.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements/common.txt b/requirements/common.txt index ea4305f6..42892472 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -1,3 +1,4 @@ enum-compat future six +ruamel.yaml diff --git a/requirements/develop.txt b/requirements/develop.txt index cd362625..b1d16c2b 100644 --- a/requirements/develop.txt +++ b/requirements/develop.txt @@ -1,4 +1,4 @@ -r test.txt tox pylint - +ruamel.yaml From b52beef30069558cb6bfa432f24e4b4320f7792e Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Wed, 25 Mar 2020 15:53:14 +0900 Subject: [PATCH 14/24] added encoding utf-8 to io --- chatette/adapters/_base.py | 2 +- chatette/adapters/rasa.py | 2 +- chatette/parsing/line_count_file_wrapper.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chatette/adapters/_base.py b/chatette/adapters/_base.py index 2bd056f5..1e8c4139 100644 --- a/chatette/adapters/_base.py +++ b/chatette/adapters/_base.py @@ -53,7 +53,7 @@ def write(self, output_directory, examples, synonyms): self.__get_file_name( batch, output_directory, single_file_output ) - with io.open(output_file_path, 'w') as output_file: + with io.open(output_file_path, 'w', encoding='utf-8') as output_file: self._write_batch(output_file, batch) @classmethod diff --git a/chatette/adapters/rasa.py b/chatette/adapters/rasa.py index c5b63570..f6ce655b 100644 --- a/chatette/adapters/rasa.py +++ b/chatette/adapters/rasa.py @@ -68,7 +68,7 @@ def _get_base_to_extend(self): if self._base_file_contents is None: if self._base_filepath is None: return self._get_empty_base() - with io.open(self._base_filepath, 'r') as base_file: + with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file: self._base_file_contents = json.load(base_file) self.check_base_file_contents() return self._base_file_contents diff --git a/chatette/parsing/line_count_file_wrapper.py b/chatette/parsing/line_count_file_wrapper.py index ace354f1..60fd29f1 100644 --- a/chatette/parsing/line_count_file_wrapper.py +++ b/chatette/parsing/line_count_file_wrapper.py @@ -15,7 +15,7 @@ class LineCountFileWrapper(object): def __init__(self, filepath, mode='r'): self.name = cast_to_unicode(filepath) - self.f = io.open(filepath, mode) + self.f = io.open(filepath, mode, encoding='utf-8') self.line_nb = 0 def close(self): From aba7c75b6e0239fdc1b800983e55b6a54d313b06 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 17 Aug 2020 15:40:33 +0900 Subject: [PATCH 15/24] added rasa YAML adapter --- chatette/__main__.py | 3 +- chatette/adapters/factory.py | 3 + chatette/adapters/rasa_yml.py | 104 ++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 chatette/adapters/rasa_yml.py diff --git a/chatette/__main__.py b/chatette/__main__.py index 3379ba94..0d558d43 100644 --- a/chatette/__main__.py +++ b/chatette/__main__.py @@ -75,8 +75,7 @@ def _add_optional_arguments(argument_parser): argument_parser.add_argument( "-a", "--adapter", dest="adapter", required=False, type=str, default="rasa", - help="Write adapter. " + \ - "Possible values: ['rasa', 'rasamd' or 'rasa-md', 'jsonl']" + help="Write adapter. Possible values: ['rasa', 'jsonl', 'rasamd' or 'rasa-md', 'rasayml']" ) argument_parser.add_argument( "--base-file", dest="base_filepath", diff --git a/chatette/adapters/factory.py b/chatette/adapters/factory.py index d7426c7b..7b71246b 100644 --- a/chatette/adapters/factory.py +++ b/chatette/adapters/factory.py @@ -6,6 +6,7 @@ from chatette.adapters.jsonl import JsonListAdapter from chatette.adapters.rasa import RasaAdapter from chatette.adapters.rasa_md import RasaMdAdapter +from chatette.adapters.rasa_yml import RasaYMLAdapter def create_adapter(adapter_name, base_filepath=None): @@ -23,6 +24,8 @@ def create_adapter(adapter_name, base_filepath=None): return RasaAdapter(base_filepath) elif adapter_name in ('rasa-md', 'rasamd'): return RasaMdAdapter(base_filepath) + elif adapter_name in ('rasa-yml', 'rasayml'): + return RasaYMLAdapter(base_filepath) elif adapter_name == 'jsonl': return JsonListAdapter(base_filepath) raise ValueError("Unknown adapter was selected.") diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py new file mode 100644 index 00000000..d781775c --- /dev/null +++ b/chatette/adapters/rasa_yml.py @@ -0,0 +1,104 @@ +import os +import io +from collections import OrderedDict +import ruamel.yaml as yaml + +from chatette.adapters._base import Adapter +from chatette.utils import append_to_list_in_dict, cast_to_unicode + +def intent_dict_to_list_of_dict(data): + list_data = [] + for key, values in data.items(): + list_data.append( + { + "intent": key, + "examples": '\n'.join(['- ' + v for v in values]) + '\n' + } + ) + + return list_data + +class RasaYMLAdapter(Adapter): + def __init__(self, base_filepath=None): + super(RasaYMLAdapter, self).__init__(base_filepath, None) + self._base_file_contents = None + + @classmethod + def _get_file_extension(cls): + return "yml" + + def __get_file_name(self, batch, output_directory, single_file): + if single_file: + return \ + os.path.join( + output_directory, "nlu." + self._get_file_extension() + ) + raise ValueError( + "Tried to generate several files with Rasa YAML adapter." + ) + + def _write_batch(self, output_file_handle, batch): + data = self._get_base_to_extend() + prepared_examples = dict() + for example in batch.examples: + append_to_list_in_dict( + prepared_examples, + example.intent_name, self.prepare_example(example) + ) + prepared_examples = intent_dict_to_list_of_dict(prepared_examples) + prepared_examples.extend( + self.__format_synonyms(batch.synonyms) + ) + data['nlu'] = prepared_examples + data = cast_to_unicode(data) + + yaml.scalarstring.walk_tree(data) + yaml.round_trip_dump(data, output_file_handle, default_flow_style=False, allow_unicode=True) + + + def prepare_example(self, example): + if len(example.entities) == 0: + return example.text + + sorted_entities = \ + sorted( + example.entities, + reverse=True, + key=lambda entity: entity._start_index + ) + result = example.text[:] + for entity in sorted_entities: + result = \ + result[:entity._start_index] + "[" + \ + result[entity._start_index:entity._start_index + entity._len] + \ + ']{"entity": "' + entity.slot_name + '"}' + \ + result[entity._start_index + entity._len:] # New rasa entity format + return result + + @classmethod + def __format_synonyms(cls, synonyms): + # {str: [str]} -> [{"value": str, "synonyms": [str]}] + return [ + { + "synonym": slot_name, + "examples": '\n'.join(['- ' + s for s in synonyms[slot_name]]) + '\n' + } + for slot_name in synonyms + if len(synonyms[slot_name]) > 1 + ] + + def _get_base_to_extend(self): + ### TODO Implement later + return self._get_empty_base() + # if self._base_file_contents is None: + # if self._base_filepath is None: + # return self._get_empty_base() + # with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file: + # self._base_file_contents = json.load(base_file) + # self.check_base_file_contents() + # return self._base_file_contents + + def _get_empty_base(self): + return { + "nlu": list() + } \ No newline at end of file From 62a233ec852ed83701378f4bdbbbc5d7707f87a0 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Fri, 26 Mar 2021 11:58:22 +0900 Subject: [PATCH 16/24] fixed wrong logic --- chatette/parsing/parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chatette/parsing/parser.py b/chatette/parsing/parser.py index cd654b1a..3707d3ce 100644 --- a/chatette/parsing/parser.py +++ b/chatette/parsing/parser.py @@ -404,6 +404,7 @@ def _parse_rule(self, tokens): current_builder = None leading_space = False elif token.type == TerminalType.slot_ref_end: + # checking annotation after slot reference rolegroup_annotation, i = self._check_for_annotations(tokens, i) current_builder.slot_rolegroup = rolegroup_annotation rule_contents.append(current_builder.create_concrete()) @@ -512,10 +513,9 @@ def _parse_choice(self, tokens): return rules def _check_for_annotations(self, tokens, i): - if ( - i == len(tokens) - and tokens[i+1].type != TerminalType.annotation_start + i+1 == len(tokens) + or tokens[i+1].type != TerminalType.annotation_start ): return None, i From 398147ed8e7440f0e7b2ab28c7359486d690920e Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 24 May 2021 15:41:09 +0900 Subject: [PATCH 17/24] changed str format to support older python --- chatette/adapters/rasa_md.py | 6 +++--- chatette/adapters/rasa_yml.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/chatette/adapters/rasa_md.py b/chatette/adapters/rasa_md.py index bd3702c3..cfb36994 100644 --- a/chatette/adapters/rasa_md.py +++ b/chatette/adapters/rasa_md.py @@ -79,11 +79,11 @@ def prepare_example(self, example): entity_annotation_text = ']{"entity": "' + entity.slot_name entity_text = result[entity._start_index:entity._start_index + entity._len] if entity_text != entity.value: - entity_annotation_text += f', "value": "{entity.value}' + entity_annotation_text += ', "value": "{}'.format(entity.value) if entity.role is not None: - entity_annotation_text += f', "role": "{entity.role}' + entity_annotation_text += ', "role": "{}'.format(entity.role) if entity.group is not None: - entity_annotation_text += f', "group": "{entity.group}' + entity_annotation_text += ', "group": "{}'.format(entity.group) result = \ result[:entity._start_index] + "[" + \ entity_text + entity_annotation_text + '"}' + \ diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py index 4c88bc96..d8e14ba0 100644 --- a/chatette/adapters/rasa_yml.py +++ b/chatette/adapters/rasa_yml.py @@ -71,11 +71,11 @@ def prepare_example(self, example): entity_annotation_text = ']{"entity": "' + entity.slot_name entity_text = result[entity._start_index:entity._start_index + entity._len] if entity_text != entity.value: - entity_annotation_text += f', "value": "{entity.value}' + entity_annotation_text += ', "value": "{}'.format(entity.value) if entity.role is not None: - entity_annotation_text += f', "role": "{entity.role}' + entity_annotation_text += ', "role": "{}'.format(entity.role) if entity.group is not None: - entity_annotation_text += f', "group": "{entity.group}' + entity_annotation_text += ', "group": "{}'.format(entity.group) result = \ result[:entity._start_index] + "[" + \ entity_text + entity_annotation_text + '"}' + \ From 61d23bc26a21ed507a9847173fac73fe0cb80db7 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 24 May 2021 17:00:12 +0900 Subject: [PATCH 18/24] add base_file reading for yaml adapter --- chatette/adapters/rasa_yml.py | 103 ++++++++++++++++++++++++++++++---- 1 file changed, 91 insertions(+), 12 deletions(-) diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py index d8e14ba0..323a5c57 100644 --- a/chatette/adapters/rasa_yml.py +++ b/chatette/adapters/rasa_yml.py @@ -2,10 +2,15 @@ import io from collections import OrderedDict import ruamel.yaml as yaml +from ruamel.yaml.scalarstring import DoubleQuotedScalarString +from ruamel.yaml.error import YAMLError +from ruamel.yaml.constructor import DuplicateKeyError from chatette.adapters._base import Adapter from chatette.utils import append_to_list_in_dict, cast_to_unicode +YAML_VERSION = (1, 2) + def intent_dict_to_list_of_dict(data): list_data = [] for key, values in data.items(): @@ -18,6 +23,18 @@ def intent_dict_to_list_of_dict(data): return list_data +def fix_yaml_loader() -> None: + """Ensure that any string read by yaml is represented as unicode.""" + """Code from Rasa yaml reader""" + def construct_yaml_str(self, node): + # Override the default string handling function + # to always return unicode objects + return self.construct_scalar(node) + + yaml.Loader.add_constructor("tag:yaml.org,2002:str", construct_yaml_str) + yaml.SafeLoader.add_constructor("tag:yaml.org,2002:str", construct_yaml_str) + + class RasaYMLAdapter(Adapter): def __init__(self, base_filepath=None): super(RasaYMLAdapter, self).__init__(base_filepath, None) @@ -94,18 +111,80 @@ def __format_synonyms(cls, synonyms): if len(synonyms[slot_name]) > 1 ] + def _read_yaml(self, content): + fix_yaml_loader() + yaml_parser = yaml.YAML(typ='safe') + yaml_parser.version = YAML_VERSION + yaml_parser.preserve_quotes = True + yaml.allow_duplicate_keys = False + + return yaml_parser.load(content) + def _get_base_to_extend(self): - ### TODO Implement later - return self._get_empty_base() - # if self._base_file_contents is None: - # if self._base_filepath is None: - # return self._get_empty_base() - # with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file: - # self._base_file_contents = json.load(base_file) - # self.check_base_file_contents() - # return self._base_file_contents + if self._base_file_contents is None: + if self._base_filepath is None: + return self._get_empty_base() + with io.open(self._base_filepath, 'r', encoding='utf-8') as base_file: + try: + self._base_file_contents = self._read_yaml(base_file.read()) + except (YAMLError, DuplicateKeyError) as e: + raise YamlSyntaxException(self._base_filepath, e) + self.check_base_file_contents() + return self._base_file_contents def _get_empty_base(self): - return { - "nlu": list() - } \ No newline at end of file + base = OrderedDict() + base['version'] = DoubleQuotedScalarString('2.0') + base['nlu'] = list() + return base + + def check_base_file_contents(self): + """ + Checks that `self._base_file_contents` contains well formatted NLU dictionary. + Throws a `SyntaxError` if the data is incorrect. + """ + if self._base_file_contents is None: + return + if not isinstance(self._base_file_contents, dict): + self._base_file_contents = None + raise SyntaxError( + "Couldn't load valid data from base file '" + \ + self._base_filepath + "'" + ) + else: + if "nlu" not in self._base_file_contents: + self._base_file_contents = None + raise SyntaxError( + "Expected 'nlu' as a root of base file '" + \ + self._base_filepath + "'") + + +class YamlSyntaxException(Exception): + """Raised when a YAML file can not be parsed properly due to a syntax error.""" + """code from rasa.shared.exceptions.YamlSyntaxException""" + + def __init__(self, filename, underlying_yaml_exception): + self.filename = filename + self.underlying_yaml_exception = underlying_yaml_exception + + def __str__(self): + if self.filename: + exception_text = "Failed to read '{}'.".format(self.filename) + else: + exception_text = "Failed to read YAML." + + if self.underlying_yaml_exception: + self.underlying_yaml_exception.warn = None + self.underlying_yaml_exception.note = None + exception_text += " {}".format(self.underlying_yaml_exception) + + if self.filename: + exception_text = exception_text.replace( + 'in ""', 'in "{}"'.format(self.filename) + ) + + exception_text += ( + "\n\nYou can use https://yamlchecker.com/ to validate the " + "YAML syntax of your file." + ) + return exception_text \ No newline at end of file From d80f3000088a0e3ec1bd0dfc6ac30ef1c5e4cf0b Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 21 Jun 2021 17:00:52 +0900 Subject: [PATCH 19/24] add ruamel to requirements in setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index b4e99e1a..62d861ea 100644 --- a/setup.py +++ b/setup.py @@ -33,5 +33,6 @@ "enum-compat", "future", "six", + "ruamel.yaml", ] ) From 3883d52c2de8173101a186c0bfb8eaef4736e1cc Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 21 Jun 2021 17:01:44 +0900 Subject: [PATCH 20/24] remove unnecessary unit_type assignment --- chatette/units/modifiable/unit_reference.py | 1 - 1 file changed, 1 deletion(-) diff --git a/chatette/units/modifiable/unit_reference.py b/chatette/units/modifiable/unit_reference.py index fcc83e65..726746a1 100644 --- a/chatette/units/modifiable/unit_reference.py +++ b/chatette/units/modifiable/unit_reference.py @@ -137,7 +137,6 @@ def __init__(self, identifier, unit_type, leading_space, modifiers, rolegroup): super(SlotRoleGroupReference, self).__init__( identifier, unit_type, leading_space, modifiers ) - self._unit_type = 'slot' # dictionary {"role": "value"}, or {"group": "value"}, or both self._role = rolegroup.get('role', None) From c8e785565623fae12f12d3a403303f710822d83b Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 21 Jun 2021 17:02:17 +0900 Subject: [PATCH 21/24] add rolegroupreference unit testing --- tests/unit-testing/parsing/test_init.py | 28 ++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tests/unit-testing/parsing/test_init.py b/tests/unit-testing/parsing/test_init.py index bf791e68..e0a4ea4f 100644 --- a/tests/unit-testing/parsing/test_init.py +++ b/tests/unit-testing/parsing/test_init.py @@ -15,7 +15,7 @@ from chatette.modifiers.representation import ModifiersRepresentation from chatette.units.modifiable.choice import Choice -from chatette.units.modifiable.unit_reference import UnitReference +from chatette.units.modifiable.unit_reference import UnitReference, SlotRoleGroupReference from chatette.units.modifiable.definitions.alias import AliasDefinition from chatette.units.modifiable.definitions.slot import SlotDefinition from chatette.units.modifiable.definitions.intent import IntentDefinition @@ -90,6 +90,32 @@ def test_create_concrete(self): assert unit_ref._unit_type == UnitType.alias assert unit_ref._name == "id" + def test_create_concrete_rolegroup_ref(self): + builder = UnitRefBuilder() + builder.identifier = "id" + + with pytest.raises(ValueError): + builder.create_concrete() + + builder.type = UnitType.slot + modifiers = builder._build_modifiers_repr() + assert isinstance(modifiers, ModifiersRepresentation) + assert not modifiers.casegen + assert not modifiers.randgen + assert modifiers.randgen.name is None + assert modifiers.randgen.percentage == 50 + assert not modifiers.randgen.opposite + + annotation = {'role': 'role', 'group': 'group'} + builder.slot_rolegroup = annotation + unit_ref = builder.create_concrete() + assert isinstance(unit_ref, SlotRoleGroupReference) + assert not unit_ref._leading_space + assert unit_ref._unit_type == UnitType.slot + assert unit_ref._name == "id" + assert unit_ref._role == annotation['role'] + assert unit_ref._group == annotation['group'] + class TestUnitDefBuilder(object): def test_creation(self): with pytest.raises(TypeError): From 41eddc4f4f2449cc000d55e0aa9d0fd6883acf47 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 21 Jun 2021 17:02:35 +0900 Subject: [PATCH 22/24] add test system for rolegroup --- .../generate-all/slotrolegroup.chatette | 25 +++++++++++++++++++ .../generate-all/slotrolegroup.solution | 22 ++++++++++++++++ tests/system-testing/test_system.py | 2 +- 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/system-testing/inputs/generate-all/slotrolegroup.chatette create mode 100644 tests/system-testing/inputs/generate-all/slotrolegroup.solution diff --git a/tests/system-testing/inputs/generate-all/slotrolegroup.chatette b/tests/system-testing/inputs/generate-all/slotrolegroup.chatette new file mode 100644 index 00000000..2900e8ea --- /dev/null +++ b/tests/system-testing/inputs/generate-all/slotrolegroup.chatette @@ -0,0 +1,25 @@ +%[single_role] + @[slot]("role":"role") + +%[single_group] + @[slot]("group":"group") + +%[single_role_group] + @[slot]("role":"role", "group":"group") + +%[multiple_role] + @[slot]("role":"role1") @[slot]("role":"role2") + +%[multiple_group] + @[slot]("group":"group1") @[slot]("group":"group2") + +%[multiple_role_group] + @[slot]("role":"role1", "group":"group1") @[slot]("role":"role2", "group":"group2") + +%[not_role_group] + @[slot]("role":"role" + +@[slot] + slot one + slot1 + diff --git a/tests/system-testing/inputs/generate-all/slotrolegroup.solution b/tests/system-testing/inputs/generate-all/slotrolegroup.solution new file mode 100644 index 00000000..d12aece8 --- /dev/null +++ b/tests/system-testing/inputs/generate-all/slotrolegroup.solution @@ -0,0 +1,22 @@ +# Contains all possible examples for the Chatette template file with the same name. +single_role>>>slot one +single_role>>>slot1 +single_group>>>slot one +single_group>>>slot1 +single_role_group>>>slot one +single_role_group>>>slot1 +multiple_role>>>slot one slot one +multiple_role>>>slot one slot1 +multiple_role>>>slot1 slot one +multiple_role>>>slot1 slot1 +multiple_group>>>slot one slot one +multiple_group>>>slot one slot1 +multiple_group>>>slot1 slot one +multiple_group>>>slot1 slot1 +multiple_role_group>>>slot one slot one +multiple_role_group>>>slot one slot1 +multiple_role_group>>>slot1 slot one +multiple_role_group>>>slot1 slot1 + +not_role_group>>>slot one("role":"role" +not_role_group>>>slot1("role":"role" \ No newline at end of file diff --git a/tests/system-testing/test_system.py b/tests/system-testing/test_system.py index 5a25eefb..7744ec23 100644 --- a/tests/system-testing/test_system.py +++ b/tests/system-testing/test_system.py @@ -157,7 +157,7 @@ def test_generate_all_training(self): input_filenames = [ "simplest.chatette", "only-words.chatette", "words-and-groups.chatette", "alias.chatette", "include.chatette", - "slot.chatette" + "slot.chatette", "slotrolegroup.chatette" ] for filename in input_filenames: file_path = os.path.join(input_dir_path, filename) From 2dfdd0e9706d31837cb4077277d60307218c0787 Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Mon, 21 Jun 2021 17:16:08 +0900 Subject: [PATCH 23/24] added missing quotation to entity text --- chatette/adapters/rasa_md.py | 6 +++--- chatette/adapters/rasa_yml.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/chatette/adapters/rasa_md.py b/chatette/adapters/rasa_md.py index cfb36994..a3434b96 100644 --- a/chatette/adapters/rasa_md.py +++ b/chatette/adapters/rasa_md.py @@ -79,11 +79,11 @@ def prepare_example(self, example): entity_annotation_text = ']{"entity": "' + entity.slot_name entity_text = result[entity._start_index:entity._start_index + entity._len] if entity_text != entity.value: - entity_annotation_text += ', "value": "{}'.format(entity.value) + entity_annotation_text += '", "value": "{}'.format(entity.value) if entity.role is not None: - entity_annotation_text += ', "role": "{}'.format(entity.role) + entity_annotation_text += '", "role": "{}'.format(entity.role) if entity.group is not None: - entity_annotation_text += ', "group": "{}'.format(entity.group) + entity_annotation_text += '", "group": "{}'.format(entity.group) result = \ result[:entity._start_index] + "[" + \ entity_text + entity_annotation_text + '"}' + \ diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py index 323a5c57..49709b44 100644 --- a/chatette/adapters/rasa_yml.py +++ b/chatette/adapters/rasa_yml.py @@ -88,11 +88,11 @@ def prepare_example(self, example): entity_annotation_text = ']{"entity": "' + entity.slot_name entity_text = result[entity._start_index:entity._start_index + entity._len] if entity_text != entity.value: - entity_annotation_text += ', "value": "{}'.format(entity.value) + entity_annotation_text += '", "value": "{}'.format(entity.value) if entity.role is not None: - entity_annotation_text += ', "role": "{}'.format(entity.role) + entity_annotation_text += '", "role": "{}'.format(entity.role) if entity.group is not None: - entity_annotation_text += ', "group": "{}'.format(entity.group) + entity_annotation_text += '", "group": "{}'.format(entity.group) result = \ result[:entity._start_index] + "[" + \ entity_text + entity_annotation_text + '"}' + \ From 29f7109633cfa93a6085b07b6807ab1eb9ac276b Mon Sep 17 00:00:00 2001 From: Tommy Gunawan Date: Thu, 12 Aug 2021 18:49:54 +0900 Subject: [PATCH 24/24] changed OrderedDict to ruamel CommentedMap --- chatette/adapters/rasa_yml.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/chatette/adapters/rasa_yml.py b/chatette/adapters/rasa_yml.py index 49709b44..8b5acc07 100644 --- a/chatette/adapters/rasa_yml.py +++ b/chatette/adapters/rasa_yml.py @@ -1,11 +1,10 @@ import os import io -from collections import OrderedDict import ruamel.yaml as yaml from ruamel.yaml.scalarstring import DoubleQuotedScalarString from ruamel.yaml.error import YAMLError from ruamel.yaml.constructor import DuplicateKeyError - +from ruamel.yaml.comments import CommentedMap as OrderedDict from chatette.adapters._base import Adapter from chatette.utils import append_to_list_in_dict, cast_to_unicode