Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
model_config:
model_name: 'llama_7b'
max_generate_length: 4096
end_token: 2
seq_length: [4096]
vocab_size: 32000
prefill_batch_size: [32]
decode_batch_size: [64]
zactivate_len: [512, 1024, 2048, 4096]
model_type: 'dyn'
seq_type: 'static'
batch_waiting_time: 0.0
decode_batch_waiting_time: 0.0
batching_strategy: 'continuous'
current_index: False
page_attention: True
model_dtype: "DataType.FLOAT32"
pad_token_id: 0
backend: 'kbk' # 'ge'
model_cfg_path: '/home/ma-user/work/mindformers/configs/llama2/predict_llama2_7b.yaml'

serving_config:
agent_ports: [16002]
start_device_id: 0
server_ip: '127.0.0.1'
server_port: 8835

pa_config:
num_blocks: 1024
block_size: 16
decode_seq_length: 4096

tokenizer:
type: LlamaTokenizer
vocab_file: '/home/ma-user/work/checkpoint_download/llama2/tokenizer.model'

basic_inputs:
type: LlamaBasicInputs

extra_inputs:
type: LlamaExtraInputs

warmup_inputs:
type: LlamaWarmupInputs
Footer