From 2a088245b117a18d9fc69dc0f5beffc3e6c14970 Mon Sep 17 00:00:00 2001 From: guangyusong <15316444+guangyusong@users.noreply.github.com> Date: Mon, 5 Jun 2023 02:01:15 -0400 Subject: [PATCH 1/2] Add RWKV models --- README.md | 1 + codetf/configs/inference/causal_lm.yaml | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/README.md b/README.md index 87b9554..063af3d 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ The following table shows the supported models with sizes and the tasks that the | GPT-NeoX | 20B | Pretrained | | GPT-Neo | 1.3B | Pretrained | | GPT-J | 6B | Pretrained | +| RWKV | 169M, 430M, 1.5B, 3B, 7B, 14B 14B | Pretrained | | Incoder | 6B | Pretrained | | CodeParrot | Small-python (110M), Small-multi(110M), 1.5B | Pretrained | | CodeBERT | CodeBERT-base, UnixCoder-base, CodeBERTa-small | Pretrained | diff --git a/codetf/configs/inference/causal_lm.yaml b/codetf/configs/inference/causal_lm.yaml index 201baad..0427df3 100644 --- a/codetf/configs/inference/causal_lm.yaml +++ b/codetf/configs/inference/causal_lm.yaml @@ -68,4 +68,28 @@ causallm-codegen2-7B-pretrained: causallm-codegen2-16B-pretrained: huggingface_url: "Salesforce/codegen2-16B" tokenizer_url: "Salesforce/codegen2-16B" + max_prediction_length: 512 +causallm-rwkv-169M-pretrained: + huggingface_url: "RWKV/rwkv-4-169m-pile" + tokenizer_url: "RWKV/rwkv-4-169m-pile" + max_prediction_length: 512 +causallm-rwkv-430M-pretrained: + huggingface_url: "RWKV/rwkv-4-430m-pile" + tokenizer_url: "RWKV/rwkv-4-430m-pile" + max_prediction_length: 512 +causallm-rwkv-1.5B-pretrained: + huggingface_url: "RWKV/rwkv-raven-1b5" + tokenizer_url: "RWKV/rwkv-raven-1b5" + max_prediction_length: 512 +causallm-rwkv-3B-pretrained: + huggingface_url: "RWKV/rwkv-raven-3b" + tokenizer_url: "RWKV/rwkv-raven-3b" + max_prediction_length: 512 +causallm-rwkv-7B-pretrained: + huggingface_url: "RWKV/rwkv-raven-7b" + tokenizer_url: "RWKV/rwkv-raven-7b" + max_prediction_length: 512 +causallm-rwkv-14B-pretrained: + huggingface_url: "RWKV/rwkv-raven-14b" + tokenizer_url: "RWKV/rwkv-raven-14b" max_prediction_length: 512 \ No newline at end of file From 5bb10173c12b99592a896ab47fac42f54bce38ab Mon Sep 17 00:00:00 2001 From: guangyusong <15316444+guangyusong@users.noreply.github.com> Date: Mon, 5 Jun 2023 02:30:50 -0400 Subject: [PATCH 2/2] Update README.md --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 063af3d..14bdd26 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ The following table shows the supported models with sizes and the tasks that the | GPT-NeoX | 20B | Pretrained | | GPT-Neo | 1.3B | Pretrained | | GPT-J | 6B | Pretrained | -| RWKV | 169M, 430M, 1.5B, 3B, 7B, 14B 14B | Pretrained | +| RWKV | 169M, 430M, 1.5B, 3B, 7B, 14B | Pretrained | | Incoder | 6B | Pretrained | | CodeParrot | Small-python (110M), Small-multi(110M), 1.5B | Pretrained | | CodeBERT | CodeBERT-base, UnixCoder-base, CodeBERTa-small | Pretrained | @@ -158,6 +158,12 @@ print(model_zoo) # codegen2-3.7B pretrained # codegen2-7B pretrained # codegen2-16B pretrained +# rwkv-169M pretrained +# rwkv-430M pretrained +# rwkv-1.5B pretrained +# rwkv-3B pretrained +# rwkv-7B pretrained +# rwkv-14B pretrained # codet5 base-multi-sum pretrained # base nl2code # base refine