From a1466511771b511058ac4205435d99660bfc8a29 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 19:21:22 +0200 Subject: [PATCH 1/9] Add BISECT --- promptsource/templates.py | 18 +++++++- .../templates/GEM/bisect/en/templates.yaml | 42 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 promptsource/templates/GEM/bisect/en/templates.yaml diff --git a/promptsource/templates.py b/promptsource/templates.py index 2b9eb341e..5e12fabce 100644 --- a/promptsource/templates.py +++ b/promptsource/templates.py @@ -27,7 +27,23 @@ # These are users whose datasets should be included in the results returned by # filter_english_datasets (regardless of their metadata) -INCLUDED_USERS = {"Zaid", "craffel", "GEM", "aps", "khalidalt", "shanya", "rbawden", "BigScienceBiasEval", "gsarti"} +INCLUDED_USERS = { + "Zaid", + "craffel", + "GEM", + "aps", + "khalidalt", + "shanya", + "rbawden", + "BigScienceBiasEval", + "gsarti", + "Helsinki-NLP", + "Muennighoff", + "facebook", + "codeparrot", + "pasinit", + "allenai" +} # These are the metrics with which templates can be tagged METRICS = { diff --git a/promptsource/templates/GEM/bisect/en/templates.yaml b/promptsource/templates/GEM/bisect/en/templates.yaml new file mode 100644 index 000000000..2c9f79dc7 --- /dev/null +++ b/promptsource/templates/GEM/bisect/en/templates.yaml @@ -0,0 +1,42 @@ +dataset: GEM/bisect +subset: en +templates: + 58342608-5cd7-4ce7-b2e1-905ecd7f4c80: !Template + answer_choices: null + id: 58342608-5cd7-4ce7-b2e1-905ecd7f4c80 + jinja: "Split and simplify the following sentence while retaining its full meaning: {{source_sentence}}\nSimplified version: ||| {{target_sentence}}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - BLEU + - ROUGE + original_task: true + name: fullmeaning + reference: '' + 6e41305c-5461-4cf3-853d-8a6fb5747623: !Template + answer_choices: null + id: 6e41305c-5461-4cf3-853d-8a6fb5747623 + jinja: "{{source_sentence}}\nThe above sentence is very complicated. Please provide me a simplified synonymous version consisting of multiple sentences: ||| {{target_sentence}}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - BLEU + - ROUGE + original_task: true + name: synonymous + reference: '' + 626b682e-e801-4e8d-9ac3-5b676c9d3da2: !Template + answer_choices: null + id: 626b682e-e801-4e8d-9ac3-5b676c9d3da2 + jinja: "{{source_sentence}}. This sentence is hard to understand. A simpler version with equivalent meaning is the following: ||| {{target_sentence}}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - ROUGE + - BLEU + original_task: true + name: equimeaning + reference: '' From a37f707d6ba014373e168e51dcc1b3842bda8564 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 19:45:52 +0200 Subject: [PATCH 2/9] Make bisect case sensitive --- promptsource/templates/GEM/bisect/en/templates.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/promptsource/templates/GEM/bisect/en/templates.yaml b/promptsource/templates/GEM/bisect/en/templates.yaml index 2c9f79dc7..cb824424b 100644 --- a/promptsource/templates/GEM/bisect/en/templates.yaml +++ b/promptsource/templates/GEM/bisect/en/templates.yaml @@ -1,4 +1,4 @@ -dataset: GEM/bisect +dataset: GEM/BiSECT subset: en templates: 58342608-5cd7-4ce7-b2e1-905ecd7f4c80: !Template From 5729eeccd6daec51e3873c685941c52788676c1e Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 19:52:47 +0200 Subject: [PATCH 3/9] Del tmp --- .../templates/GEM/bisect/en/templates.yaml | 42 ------------------- 1 file changed, 42 deletions(-) delete mode 100644 promptsource/templates/GEM/bisect/en/templates.yaml diff --git a/promptsource/templates/GEM/bisect/en/templates.yaml b/promptsource/templates/GEM/bisect/en/templates.yaml deleted file mode 100644 index cb824424b..000000000 --- a/promptsource/templates/GEM/bisect/en/templates.yaml +++ /dev/null @@ -1,42 +0,0 @@ -dataset: GEM/BiSECT -subset: en -templates: - 58342608-5cd7-4ce7-b2e1-905ecd7f4c80: !Template - answer_choices: null - id: 58342608-5cd7-4ce7-b2e1-905ecd7f4c80 - jinja: "Split and simplify the following sentence while retaining its full meaning: {{source_sentence}}\nSimplified version: ||| {{target_sentence}}" - metadata: !TemplateMetadata - choices_in_prompt: false - languages: [] - metrics: - - BLEU - - ROUGE - original_task: true - name: fullmeaning - reference: '' - 6e41305c-5461-4cf3-853d-8a6fb5747623: !Template - answer_choices: null - id: 6e41305c-5461-4cf3-853d-8a6fb5747623 - jinja: "{{source_sentence}}\nThe above sentence is very complicated. Please provide me a simplified synonymous version consisting of multiple sentences: ||| {{target_sentence}}" - metadata: !TemplateMetadata - choices_in_prompt: false - languages: [] - metrics: - - BLEU - - ROUGE - original_task: true - name: synonymous - reference: '' - 626b682e-e801-4e8d-9ac3-5b676c9d3da2: !Template - answer_choices: null - id: 626b682e-e801-4e8d-9ac3-5b676c9d3da2 - jinja: "{{source_sentence}}. This sentence is hard to understand. A simpler version with equivalent meaning is the following: ||| {{target_sentence}}" - metadata: !TemplateMetadata - choices_in_prompt: false - languages: [] - metrics: - - ROUGE - - BLEU - original_task: true - name: equimeaning - reference: '' From ce04892a1117fee7cdea2b5a7155a1189e1c0c91 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 19:53:56 +0200 Subject: [PATCH 4/9] Case sens? --- promptsource/templates/GEM/BiSECT/templates.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 promptsource/templates/GEM/BiSECT/templates.yaml diff --git a/promptsource/templates/GEM/BiSECT/templates.yaml b/promptsource/templates/GEM/BiSECT/templates.yaml new file mode 100644 index 000000000..e69de29bb From 92e43923109becd047833717603ad98920fda540 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 19:54:46 +0200 Subject: [PATCH 5/9] Readd tmplate --- .../templates/GEM/BiSECT/templates.yaml | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/promptsource/templates/GEM/BiSECT/templates.yaml b/promptsource/templates/GEM/BiSECT/templates.yaml index e69de29bb..cb824424b 100644 --- a/promptsource/templates/GEM/BiSECT/templates.yaml +++ b/promptsource/templates/GEM/BiSECT/templates.yaml @@ -0,0 +1,42 @@ +dataset: GEM/BiSECT +subset: en +templates: + 58342608-5cd7-4ce7-b2e1-905ecd7f4c80: !Template + answer_choices: null + id: 58342608-5cd7-4ce7-b2e1-905ecd7f4c80 + jinja: "Split and simplify the following sentence while retaining its full meaning: {{source_sentence}}\nSimplified version: ||| {{target_sentence}}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - BLEU + - ROUGE + original_task: true + name: fullmeaning + reference: '' + 6e41305c-5461-4cf3-853d-8a6fb5747623: !Template + answer_choices: null + id: 6e41305c-5461-4cf3-853d-8a6fb5747623 + jinja: "{{source_sentence}}\nThe above sentence is very complicated. Please provide me a simplified synonymous version consisting of multiple sentences: ||| {{target_sentence}}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - BLEU + - ROUGE + original_task: true + name: synonymous + reference: '' + 626b682e-e801-4e8d-9ac3-5b676c9d3da2: !Template + answer_choices: null + id: 626b682e-e801-4e8d-9ac3-5b676c9d3da2 + jinja: "{{source_sentence}}. This sentence is hard to understand. A simpler version with equivalent meaning is the following: ||| {{target_sentence}}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - ROUGE + - BLEU + original_task: true + name: equimeaning + reference: '' From 40dc17f27bee365df3958d82aac1961b88367146 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 19:57:19 +0200 Subject: [PATCH 6/9] Line length --- promptsource/templates/GEM/BiSECT/templates.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/promptsource/templates/GEM/BiSECT/templates.yaml b/promptsource/templates/GEM/BiSECT/templates.yaml index cb824424b..b3f6546ad 100644 --- a/promptsource/templates/GEM/BiSECT/templates.yaml +++ b/promptsource/templates/GEM/BiSECT/templates.yaml @@ -4,7 +4,8 @@ templates: 58342608-5cd7-4ce7-b2e1-905ecd7f4c80: !Template answer_choices: null id: 58342608-5cd7-4ce7-b2e1-905ecd7f4c80 - jinja: "Split and simplify the following sentence while retaining its full meaning: {{source_sentence}}\nSimplified version: ||| {{target_sentence}}" + jinja: "Split and simplify the following sentence while retaining its full meaning: + {{source_sentence}}\n Simplified version: ||| {{target_sentence}}" metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -17,7 +18,8 @@ templates: 6e41305c-5461-4cf3-853d-8a6fb5747623: !Template answer_choices: null id: 6e41305c-5461-4cf3-853d-8a6fb5747623 - jinja: "{{source_sentence}}\nThe above sentence is very complicated. Please provide me a simplified synonymous version consisting of multiple sentences: ||| {{target_sentence}}" + jinja: "{{source_sentence}}\nThe above sentence is very complicated. + Please provide me a simplified synonymous version consisting of multiple sentences: ||| {{target_sentence}}" metadata: !TemplateMetadata choices_in_prompt: false languages: [] From 9ccec251b6f53a629c146975a2bff71352d65bc5 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 20:02:35 +0200 Subject: [PATCH 7/9] Reduce line len --- promptsource/templates/GEM/BiSECT/templates.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/promptsource/templates/GEM/BiSECT/templates.yaml b/promptsource/templates/GEM/BiSECT/templates.yaml index b3f6546ad..ff5ac3de6 100644 --- a/promptsource/templates/GEM/BiSECT/templates.yaml +++ b/promptsource/templates/GEM/BiSECT/templates.yaml @@ -32,7 +32,8 @@ templates: 626b682e-e801-4e8d-9ac3-5b676c9d3da2: !Template answer_choices: null id: 626b682e-e801-4e8d-9ac3-5b676c9d3da2 - jinja: "{{source_sentence}}. This sentence is hard to understand. A simpler version with equivalent meaning is the following: ||| {{target_sentence}}" + jinja: "{{source_sentence}}. This sentence is hard to understand. + A simpler version with equivalent meaning is the following: ||| {{target_sentence}}" metadata: !TemplateMetadata choices_in_prompt: false languages: [] From 8534afad12f299e6faca0dba9c229eb31233a2de Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 20:58:07 +0200 Subject: [PATCH 8/9] Add subset --- promptsource/templates/GEM/BiSECT/{ => en}/templates.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename promptsource/templates/GEM/BiSECT/{ => en}/templates.yaml (100%) diff --git a/promptsource/templates/GEM/BiSECT/templates.yaml b/promptsource/templates/GEM/BiSECT/en/templates.yaml similarity index 100% rename from promptsource/templates/GEM/BiSECT/templates.yaml rename to promptsource/templates/GEM/BiSECT/en/templates.yaml From a2adff160176df2c5a316f73c792d02f9a2a05ff Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 21:05:34 +0200 Subject: [PATCH 9/9] Fix col names --- promptsource/templates/GEM/BiSECT/en/templates.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/promptsource/templates/GEM/BiSECT/en/templates.yaml b/promptsource/templates/GEM/BiSECT/en/templates.yaml index ff5ac3de6..b5c81b170 100644 --- a/promptsource/templates/GEM/BiSECT/en/templates.yaml +++ b/promptsource/templates/GEM/BiSECT/en/templates.yaml @@ -5,7 +5,7 @@ templates: answer_choices: null id: 58342608-5cd7-4ce7-b2e1-905ecd7f4c80 jinja: "Split and simplify the following sentence while retaining its full meaning: - {{source_sentence}}\n Simplified version: ||| {{target_sentence}}" + {{source}}\n Simplified version: ||| {{target}}" metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -18,8 +18,8 @@ templates: 6e41305c-5461-4cf3-853d-8a6fb5747623: !Template answer_choices: null id: 6e41305c-5461-4cf3-853d-8a6fb5747623 - jinja: "{{source_sentence}}\nThe above sentence is very complicated. - Please provide me a simplified synonymous version consisting of multiple sentences: ||| {{target_sentence}}" + jinja: "{{source}}\nThe above sentence is very complicated. + Please provide me a simplified synonymous version consisting of multiple sentences: ||| {{target}}" metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -32,8 +32,8 @@ templates: 626b682e-e801-4e8d-9ac3-5b676c9d3da2: !Template answer_choices: null id: 626b682e-e801-4e8d-9ac3-5b676c9d3da2 - jinja: "{{source_sentence}}. This sentence is hard to understand. - A simpler version with equivalent meaning is the following: ||| {{target_sentence}}" + jinja: "{{source}}. This sentence is hard to understand. + A simpler version with equivalent meaning is the following: ||| {{target}}" metadata: !TemplateMetadata choices_in_prompt: false languages: []