1+ {
2+ "CodeLlama-34B-Instruct" : {
3+ "link" : " https://huggingface.co/codellama/CodeLlama-34b-hf" ,
4+ "open-data" : " None" ,
5+ "pass@1" : {
6+ "instruct" : null ,
7+ "complete" : 38.73
8+ },
9+ "prompted" : true ,
10+ "size" : 34 ,
11+ "direct_complete" : false ,
12+ "lazy" : false ,
13+ "elo_mle" : 942
14+ },
15+ "Meta-Llama-3-70B" : {
16+ "link" : " https://huggingface.co/meta-llama/Meta-Llama-3-70B" ,
17+ "open-data" : " None" ,
18+ "pass@1" : {
19+ "instruct" : null ,
20+ "complete" : 48.98
21+ },
22+ "prompted" : false ,
23+ "size" : 70 ,
24+ "direct_complete" : false ,
25+ "lazy" : false ,
26+ "elo_mle" : 874
27+ },
28+ "Meta-Llama-3-70B-Instruct" : {
29+ "link" : " https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" ,
30+ "open-data" : " None" ,
31+ "pass@1" : {
32+ "instruct" : null ,
33+ "complete" : 62.45
34+ },
35+ "prompted" : true ,
36+ "size" : 70 ,
37+ "direct_complete" : false ,
38+ "lazy" : false ,
39+ "elo_mle" : 874
40+ },
41+ "Meta-Llama-3.1-70B-Instruct" : {
42+ "link" : " https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct" ,
43+ "open-data" : " None" ,
44+ "pass@1" : {
45+ "instruct" : null ,
46+ "complete" : 60
47+ },
48+ "prompted" : true ,
49+ "size" : 70 ,
50+ "direct_complete" : false ,
51+ "lazy" : false ,
52+ "elo_mle" : 874
53+ },
54+ "Meta-Llama-3.1-70B" : {
55+ "link" : " https://huggingface.co/meta-llama/Llama-3.1-70B" ,
56+ "open-data" : " None" ,
57+ "pass@1" : {
58+ "instruct" : null ,
59+ "complete" : 37.56
60+ },
61+ "prompted" : false ,
62+ "size" : 70 ,
63+ "direct_complete" : false ,
64+ "lazy" : false ,
65+ "elo_mle" : 874
66+ },
67+ "Mistral-7B-Instruct-v0.3" : {
68+ "link" : " https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" ,
69+ "open-data" : " None" ,
70+ "pass@1" : {
71+ "instruct" : null ,
72+ "complete" : 43.33
73+ },
74+ "prompted" : true ,
75+ "size" : 7 ,
76+ "direct_complete" : false ,
77+ "lazy" : false ,
78+ "elo_mle" : 874
79+ },
80+ "Mixtral-8x7B-Instruct-v0.1" : {
81+ "link" : " https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1" ,
82+ "open-data" : " None" ,
83+ "pass@1" : {
84+ "instruct" : null ,
85+ "complete" : 42.96
86+ },
87+ "prompted" : true ,
88+ "size" : 7 ,
89+ "direct_complete" : false ,
90+ "lazy" : false ,
91+ "elo_mle" : 874
92+ },
93+ "Codestral-22B-v0.1" : {
94+ "link" : " https://huggingface.co/mistralai/Codestral-22B-v0.1" ,
95+ "open-data" : " None" ,
96+ "pass@1" : {
97+ "instruct" : null ,
98+ "complete" : 47.6
99+ },
100+ "prompted" : true ,
101+ "size" : 22 ,
102+ "direct_complete" : false ,
103+ "lazy" : false ,
104+ "elo_mle" : 874
105+ },
106+ "Phi-3-medium-128k-instruct" : {
107+ "link" : " https://huggingface.co/microsoft/Phi-3-medium-128k-instruct" ,
108+ "open-data" : " None" ,
109+ "pass@1" : {
110+ "instruct" : null ,
111+ "complete" : 48.03
112+ },
113+ "prompted" : true ,
114+ "size" : 14 ,
115+ "direct_complete" : false ,
116+ "lazy" : false ,
117+ "elo_mle" : 874
118+ },
119+ "Phi-3-mini-128k-instruct" : {
120+ "link" : " https://huggingface.co/microsoft/Phi-3-mini-128k-instruct" ,
121+ "open-data" : " None" ,
122+ "pass@1" : {
123+ "instruct" : null ,
124+ "complete" : 37.93
125+ },
126+ "prompted" : true ,
127+ "size" : 3.8 ,
128+ "direct_complete" : false ,
129+ "lazy" : false ,
130+ "elo_mle" : 874
131+ },
132+ "Qwen2-57B-A14B-Instruct" : {
133+ "link" : " https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct" ,
134+ "open-data" : " None" ,
135+ "pass@1" : {
136+ "instruct" : null ,
137+ "complete" : 46.34
138+ },
139+ "prompted" : true ,
140+ "size" : 57 ,
141+ "direct_complete" : false ,
142+ "lazy" : false ,
143+ "elo_mle" : 874
144+ },
145+ "CodeQwen1.5-7B-Chat" : {
146+ "link" : " https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat" ,
147+ "open-data" : " None" ,
148+ "pass@1" : {
149+ "instruct" : null ,
150+ "complete" : 49.82
151+ },
152+ "prompted" : true ,
153+ "size" : 7 ,
154+ "direct_complete" : false ,
155+ "lazy" : false ,
156+ "elo_mle" : 874
157+ },
158+ "Yi-1.5-34B-Chat" : {
159+ "link" : " https://huggingface.co/01-ai/Yi-1.5-34B-Chat" ,
160+ "open-data" : " None" ,
161+ "pass@1" : {
162+ "instruct" : null ,
163+ "complete" : 49.39
164+ },
165+ "prompted" : true ,
166+ "size" : 34 ,
167+ "direct_complete" : false ,
168+ "lazy" : false ,
169+ "elo_mle" : 874
170+ },
171+ "Yi-1.5-9B-Chat" : {
172+ "link" : " https://huggingface.co/01-ai/Yi-1.5-9B-Chat" ,
173+ "open-data" : " None" ,
174+ "pass@1" : {
175+ "instruct" : null ,
176+ "complete" : 47.23
177+ },
178+ "prompted" : true ,
179+ "size" : 9 ,
180+ "direct_complete" : false ,
181+ "lazy" : false ,
182+ "elo_mle" : 874
183+ },
184+ "DeepSeek-coder-7b-instruct-v1.5" : {
185+ "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-7b-instruct-v1.5" ,
186+ "open-data" : " None" ,
187+ "pass@1" : {
188+ "instruct" : null ,
189+ "complete" : 41.21
190+ },
191+ "prompted" : true ,
192+ "size" : 7 ,
193+ "direct_complete" : false ,
194+ "lazy" : false ,
195+ "elo_mle" : 874
196+ },
197+ "DeepSeek-coder-33b-instruct" : {
198+ "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct" ,
199+ "open-data" : " None" ,
200+ "pass@1" : {
201+ "instruct" : null ,
202+ "complete" : 36.6
203+ },
204+ "prompted" : true ,
205+ "size" : 33 ,
206+ "direct_complete" : false ,
207+ "lazy" : false ,
208+ "elo_mle" : 874
209+ },
210+ "DeepSeek-moe-16b-chat" : {
211+ "link" : " https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat" ,
212+ "open-data" : " None" ,
213+ "pass@1" : {
214+ "instruct" : null ,
215+ "complete" : 31.01
216+ },
217+ "prompted" : true ,
218+ "size" : 16.4 ,
219+ "direct_complete" : false ,
220+ "lazy" : false ,
221+ "elo_mle" : 874
222+ },
223+ "DeepSeek-Coder-V2-Lite-Instruct" : {
224+ "link" : " https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" ,
225+ "open-data" : " None" ,
226+ "pass@1" : {
227+ "instruct" : null ,
228+ "complete" : 46.51
229+ },
230+ "prompted" : true ,
231+ "size" : 16 ,
232+ "direct_complete" : false ,
233+ "lazy" : false ,
234+ "elo_mle" : 874
235+ },
236+ "InternLM2-5-20b-chat" : {
237+ "link" : " https://huggingface.co/internlm/internlm2_5-20b-chat" ,
238+ "open-data" : " None" ,
239+ "pass@1" : {
240+ "instruct" : null ,
241+ "complete" : 44.89
242+ },
243+ "prompted" : true ,
244+ "size" : 20 ,
245+ "direct_complete" : false ,
246+ "lazy" : false ,
247+ "elo_mle" : 874
248+ },
249+ "StarCoder2-15b-instruct-v0.1" : {
250+ "link" : " https://huggingface.co/bigcode/starcoder2-15b-instruct-v0.1" ,
251+ "open-data" : " None" ,
252+ "pass@1" : {
253+ "instruct" : null ,
254+ "complete" : 47.94
255+ },
256+ "prompted" : true ,
257+ "size" : 15 ,
258+ "direct_complete" : false ,
259+ "lazy" : false ,
260+ "elo_mle" : 874
261+ },
262+ "Claude-3-sonnet@20240229" : {
263+ "link" : " " ,
264+ "open-data" : " None" ,
265+ "pass@1" : {
266+ "instruct" : null ,
267+ "complete" : 53.97
268+ },
269+ "prompted" : true ,
270+ "size" : null ,
271+ "direct_complete" : false ,
272+ "lazy" : false ,
273+ "elo_mle" : 874
274+ },
275+ "GPT-4o-2024-05-13" : {
276+ "link" : " " ,
277+ "open-data" : " None" ,
278+ "pass@1" : {
279+ "instruct" : null ,
280+ "complete" : 67
281+ },
282+ "prompted" : true ,
283+ "size" : null ,
284+ "direct_complete" : false ,
285+ "lazy" : false ,
286+ "elo_mle" : 874
287+ },
288+ "GPT-3.5-turbo-0613" : {
289+ "link" : " " ,
290+ "open-data" : null ,
291+ "pass@1" : {
292+ "instruct" : null ,
293+ "complete" : 51.7
294+ },
295+ "prompted" : true ,
296+ "size" : null ,
297+ "direct_complete" : false ,
298+ "lazy" : false ,
299+ "elo_mle" : 874
300+ }
301+ }
0 commit comments