1
0
This commit is contained in:
liushuang 2025-10-15 10:30:12 +08:00
parent 89973bfa80
commit fcbe2da11e

View File

@ -51,11 +51,10 @@ vllm serve \
modelscope download --model Qwen/Qwen3-0.6B --local_dir /home/ss/vllm-py12/qwen3-06b modelscope download --model Qwen/Qwen3-0.6B --local_dir /home/ss/vllm-py12/qwen3-06b
# 运行 Qwen3-8b # 运行 Qwen3-8b
vllm serve \ vllm serve /home/ss/vllm-py12/qwen3-06b \
--host 0.0.0.0 \ --host 0.0.0.0 \
--port 18778 \ --port 8000 \
--model /home/ss/vllm-py12/qwen3-06b \ --served-model-name Qwen3-0.6B \
--served-model-name qwen3-06b \
--tensor-parallel-size 1 \ --tensor-parallel-size 1 \
--dtype auto \ --dtype auto \
--gpu-memory-utilization 0.9 \ --gpu-memory-utilization 0.9 \