drizzlezyk commited on
Commit
4327ffc
·
verified ·
1 Parent(s): 572f6b6

Upload inference/runner_config/tp32.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. inference/runner_config/tp32.yaml +30 -0
inference/runner_config/tp32.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
3
+
4
+ model_name: "pangu_ultra_moe"
5
+ model_path: "./model"
6
+ exe_mode: "eager" # ["dynamo", "eager"]
7
+
8
+ model_config:
9
+ tokenizer_mode: default # ["default", "chat"]
10
+ mm_quant_mode: None
11
+ mla_backend: absorb # [native, absorb]
12
+ with_ckpt: 1 # [0, 1]
13
+ enable_profiler: 0 # [0, 1]
14
+
15
+ data_config:
16
+ input_max_len: 4096
17
+ max_new_tokens: 28000
18
+ batch_size: 1
19
+ max_position_embeddings: 32768
20
+
21
+ parallel_config:
22
+ attn_tp_size: 32
23
+ moe_tp_size: 32
24
+ embed_tp_size: 32
25
+
26
+ sampling_config:
27
+ top_n_sigma: 0.05
28
+ top_p: 1.0
29
+ temperature: 0.7
30
+ top_k: -1