-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathterminal_bench_split.json
More file actions
100 lines (100 loc) · 2.39 KB
/
terminal_bench_split.json
File metadata and controls
100 lines (100 loc) · 2.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
{
"seed": 42,
"source": "terminal-bench@2.0 registry (89 tasks)",
"method": "random 30/70 split, seed 42",
"n_test": 27,
"n_train": 62,
"test": [
"bn-fit-modify",
"chess-best-move",
"cobol-modernization",
"count-dataset-tokens",
"distribution-search",
"filter-js-from-html",
"git-leak-recovery",
"git-multibranch",
"install-windows-3.11",
"largest-eigenval",
"log-summary-date-ranges",
"make-mips-interpreter",
"merge-diff-arc-agi-task",
"modernize-scientific-stack",
"mteb-retrieve",
"overfull-hbox",
"password-recovery",
"path-tracing-reverse",
"polyglot-c-py",
"protein-assembly",
"prove-plus-comm",
"pytorch-model-recovery",
"qemu-alpine-ssh",
"regex-chess",
"rstan-to-pystan",
"sqlite-with-gcov",
"tune-mjcf"
],
"train": [
"adaptive-rejection-sampler",
"break-filter-js-from-html",
"build-cython-ext",
"build-pmars",
"build-pov-ray",
"caffe-cifar-10",
"cancel-async-tasks",
"circuit-fibsqrt",
"code-from-image",
"compile-compcert",
"configure-git-webserver",
"constraints-scheduling",
"crack-7z-hash",
"custom-memory-heap-crash",
"db-wal-recovery",
"dna-assembly",
"dna-insert",
"extract-elf",
"extract-moves-from-video",
"feal-differential-cryptanalysis",
"feal-linear-cryptanalysis",
"financial-document-processor",
"fix-code-vulnerability",
"fix-git",
"fix-ocaml-gc",
"gcode-to-text",
"gpt2-codegolf",
"headless-terminal",
"hf-model-inference",
"kv-store-grpc",
"large-scale-text-editing",
"llm-inference-batching-scheduler",
"mailman",
"make-doom-for-mips",
"mcmc-sampling-stan",
"model-extraction-relu-logits",
"mteb-leaderboard",
"multi-source-data-merger",
"nginx-request-logging",
"openssl-selfsigned-cert",
"path-tracing",
"polyglot-rust-c",
"portfolio-optimization",
"pypi-server",
"pytorch-model-cli",
"qemu-startup",
"query-optimize",
"raman-fitting",
"regex-log",
"reshard-c4-data",
"sam-cell-seg",
"sanitize-git-repo",
"schemelike-metacircular-eval",
"sparql-university",
"sqlite-db-truncate",
"torch-pipeline-parallelism",
"torch-tensor-parallelism",
"train-fasttext",
"video-processing",
"vulnerable-secret",
"winning-avg-corewars",
"write-compressor"
]
}