Skip to content

Commit d145e87

Browse files
authored
[None][chore] Update disagg benchmark configs (#8289)
Signed-off-by: Xianjie <5410381+qiaoxj07@users.noreply.github.com> Signed-off-by: Xianjie Qiao <5410381+qiaoxj07@users.noreply.github.com>
1 parent d882c92 commit d145e87

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

examples/disaggregated/slurm/benchmark/gen_worker_config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ def gen_config_file(work_dir: str,
9292
},
9393
'tensor_parallel_size': gen_tp_size,
9494
'moe_expert_parallel_size': gen_tp_size,
95-
'enable_attention_dp': True if gen_enable_attention_dp else False,
95+
'enable_attention_dp': gen_enable_attention_dp,
96+
'enable_lm_head_tp_in_adp': gen_enable_attention_dp and mtp_size > 0,
9697
'pipeline_parallel_size': gen_pp_size,
9798
'max_batch_size': gen_batch_size,
9899
'max_num_tokens': gen_max_num_tokens,
@@ -109,6 +110,7 @@ def gen_config_file(work_dir: str,
109110
},
110111
'moe_config': {
111112
'backend': gen_moe_backend,
113+
'use_low_precision_moe_combine': True,
112114
},
113115
'cache_transceiver_config': {
114116
'max_tokens_in_buffer': cache_transceiver_max_num_tokens,

examples/disaggregated/slurm/benchmark/start_worker.sh

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ echo "enable_pdl: ${enable_pdl}, work_dir: ${work_dir}"
1919
echo "SLURM_PROCID: ${SLURM_PROCID}, hostname: $(hostname), instance_id: ${instance_id}"
2020

2121
export TLLM_LOG_LEVEL=INFO
22+
export TRTLLM_SERVER_DISABLE_GC=1
23+
export TRTLLM_WORKER_DISABLE_GC=1
2224

2325
if [ "${enable_pdl}" = "true" ]; then
2426
export TRTLLM_ENABLE_PDL=1
@@ -62,15 +64,16 @@ if [ "${enable_nsys}" != "true" ]; then
6264
trtllm-llmapi-launch ${numa_bind_cmd} trtllm-serve ${model_path} --host $(hostname) --port ${port} --extra_llm_api_options ${config_file}
6365
else
6466
nsys_prefix=""
65-
nsys_file=${work_dir}/nsys_worker_proc_${instance_id}_${SLURM_PROCID}
67+
nsys_file=${work_dir}/nsys_worker_proc_${role}_${instance_id}_${SLURM_PROCID}
6668
export TLLM_PROFILE_RECORD_GC=1
6769
export TLLM_NVTX_DEBUG=1
68-
if [ "${role}" = "GEN" ] && [ "$SLURM_PROCID" = "0" ]; then
70+
nsys_prefix="nsys profile -e \"NSYS_MPI_STORE_TEAMS_PER_RANK=1\" -o ${nsys_file} -f true -t cuda,nvtx,python-gil -c cudaProfilerApi --cuda-graph-trace node --capture-range-end=stop --gpu-metrics-devices=none"
71+
if [ "${role}" = "GEN" ]; then
6972
export TLLM_PROFILE_START_STOP=200-250
70-
nsys_prefix="nsys profile -e \"NSYS_MPI_STORE_TEAMS_PER_RANK=1\" -o ${nsys_file} -f true -t cuda,nvtx,python-gil -c cudaProfilerApi --cuda-graph-trace node --capture-range-end=stop --gpu-metrics-devices=none"
71-
echo "nsys_prefix: ${nsys_prefix}"
73+
echo "nsys is enabled on gen_gpus"
7274
elif [ "${role}" = "CTX" ]; then
73-
echo "nsys is not enabled on ctx_gpus"
75+
export TLLM_PROFILE_START_STOP=10-30
76+
echo "nsys is enabled on ctx_gpus"
7477
fi
7578
${nsys_prefix} trtllm-llmapi-launch ${numa_bind_cmd} \
7679
trtllm-serve ${model_path} \

0 commit comments

Comments
 (0)