From 93a8885d923b9f5d7a60d0dd3bfc367c956454be Mon Sep 17 00:00:00 2001 From: KnightLancelot Date: Wed, 4 Sep 2024 15:54:31 +0800 Subject: [PATCH] =?UTF-8?q?=20=20=20=20=20=20=20=20#=20=E8=A7=82=E5=AF=9F?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=90=8E=EF=BC=8C=E5=8F=91=E7=8E=B0=E4=B8=8B?= =?UTF-8?q?=E9=9D=A2=E7=9A=84=E4=BB=A3=E7=A0=81=E4=BC=9A=E8=BF=87=E6=BB=A4?= =?UTF-8?q?=E6=8E=89=E4=B8=80=E4=BA=9B=E6=B2=A1=E6=9C=89=E9=97=AE=E9=A2=98?= =?UTF-8?q?=E7=9A=84=E6=95=B0=E6=8D=AE=EF=BC=8C=E5=A6=82=EF=BC=9Asure,=20h?= =?UTF-8?q?ere=20are=20some=20tools=20and=20technologies=20that=20can=20be?= =?UTF-8?q?=20used=20to=20implement=20the=20interactive=20elements=20on=20?= =?UTF-8?q?this=20website:\n\n1.=20infographics=20and=20videos:\n-=20canva?= =?UTF-8?q?:=20a=20design=20tool=20that=20helps=20create=20visually=20appe?= =?UTF-8?q?aling=20graphics=20and=20infographics.\n-=20adobe=20creative=20?= =?UTF-8?q?suite:=20a=20suite=20of=20creative=20tools=20that=20includes=20?= =?UTF-8?q?photoshop,=20illustrator,=20and=20premiere=20pro=20for=20creati?= =?UTF-8?q?ng=20graphics=20and=20videos.\n-=20animoto:=20a=20video=20creat?= =?UTF-8?q?ion=20tool=20that=20allows=20users=20to=20create=20professional?= =?UTF-8?q?-looking=20videos=20with=20ease.\n\n2.=20quizzes=20and=20survey?= =?UTF-8?q?s:\n-=20surveymonkey:=20a=20popular=20survey=20tool=20that=20al?= =?UTF-8?q?lows=20users=20to=20create=20and=20distribute=20surveys.\n-=20g?= =?UTF-8?q?oogle=20forms:=20a=20free=20tool=20that=20allows=20users=20to?= =?UTF-8?q?=20create=20surveys=20and=20quizzes=20easily.\n\n3.=20chatbots:?= =?UTF-8?q?\n-=20dialogflow:=20a=20natural=20language=20processing=20platf?= =?UTF-8?q?orm=20that=20allows=20users=20to=20create=20chatbots.\n-=20ibm?= =?UTF-8?q?=20watson=20assistant:=20a=20chatbot=20tool=20that=20uses=20ai?= =?UTF-8?q?=20to=20understand=20and=20respond=20to=20user=20queries.\n\n4.?= =?UTF-8?q?=20interactive=20timelines:\n-=20tiki-toki:=20an=20online=20tim?= =?UTF-8?q?eline=20creation=20tool=20that=20allows=20users=20to=20create?= =?UTF-8?q?=20interactive=20timelines.\n-=20timelinejs:=20a=20free,=20open?= =?UTF-8?q?-source=20tool=20that=20allows=20users=20to=20create=20interact?= =?UTF-8?q?ive=20timelines=20using=20google=20sheets.\n\n5.=20visuals:\n-?= =?UTF-8?q?=20unsplash:=20a=20platform=20that=20provides=20free,=20high-qu?= =?UTF-8?q?ality=20images.\n-=20pexels:=20a=20platform=20that=20provides?= =?UTF-8?q?=20free=20stock=20photos=20and=20videos.\n\nthese=20tools=20and?= =?UTF-8?q?=20technologies=20can=20help=20create=20engaging=20and=20intera?= =?UTF-8?q?ctive=20elements=20on=20the=20website=20and=20enhance=20the=20u?= =?UTF-8?q?ser=20experience.=20=20=20=20=20=20=20=20=20#=20=E6=89=80?= =?UTF-8?q?=E4=BB=A5=E8=BF=99=E9=87=8C=E5=B0=86"if=20'moss'=20in=20value?= =?UTF-8?q?=20or=20'human:'=20in=20value=20or=20'assistant:'=20in=20value?= =?UTF-8?q?=20or=20'user:'=20in=20value:"=20=20=20=20=20=20=20=20=20#=20?= =?UTF-8?q?=E6=94=B9=E4=B8=BA"if=20'moss'=20in=20value=20or=20'human:'=20i?= =?UTF-8?q?n=20value=20or=20'\nassistant:'=20in=20value=20or=20(=20'human:?= =?UTF-8?q?'=20in=20value=20and=20'assistant:'=20in=20value=20)=20or=20(?= =?UTF-8?q?=20'user:'=20in=20value=20and=20'assistant:'=20in=20value=20):"?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- swift/llm/utils/dataset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/swift/llm/utils/dataset.py b/swift/llm/utils/dataset.py index fc27c35eb6..7fb935ec44 100644 --- a/swift/llm/utils/dataset.py +++ b/swift/llm/utils/dataset.py @@ -916,7 +916,10 @@ def _repair_ms_bench(conversations: str) -> Optional[List[Dict[str, str]]]: # skip MOSS for c in conversations: value = c['value'].lower() - if 'moss' in value or 'human:' in value or 'assistant:' in value or 'user:' in value: + # 观察数据后,发现下面的代码会过滤掉一些没有问题的数据,如:sure, here are some tools and technologies that can be used to implement the interactive elements on this website:\n\n1. infographics and videos:\n- canva: a design tool that helps create visually appealing graphics and infographics.\n- adobe creative suite: a suite of creative tools that includes photoshop, illustrator, and premiere pro for creating graphics and videos.\n- animoto: a video creation tool that allows users to create professional-looking videos with ease.\n\n2. quizzes and surveys:\n- surveymonkey: a popular survey tool that allows users to create and distribute surveys.\n- google forms: a free tool that allows users to create surveys and quizzes easily.\n\n3. chatbots:\n- dialogflow: a natural language processing platform that allows users to create chatbots.\n- ibm watson assistant: a chatbot tool that uses ai to understand and respond to user queries.\n\n4. interactive timelines:\n- tiki-toki: an online timeline creation tool that allows users to create interactive timelines.\n- timelinejs: a free, open-source tool that allows users to create interactive timelines using google sheets.\n\n5. visuals:\n- unsplash: a platform that provides free, high-quality images.\n- pexels: a platform that provides free stock photos and videos.\n\nthese tools and technologies can help create engaging and interactive elements on the website and enhance the user experience. + # 所以这里将"if 'moss' in value or 'human:' in value or 'assistant:' in value or 'user:' in value:" + # 改为"if 'moss' in value or 'human:' in value or '\nassistant:' in value or ( 'human:' in value and 'assistant:' in value ) or ( 'user:' in value and 'assistant:' in value ):"。 + if 'moss' in value or 'human:' in value or '\nassistant:' in value or ( 'human:' in value and 'assistant:' in value ) or ( 'user:' in value and 'assistant:' in value ): return return conversations