From 37c6857458e1db82453e537ba5ceeb560b0d82ee Mon Sep 17 00:00:00 2001 From: 8ga Date: Fri, 17 Oct 2025 16:36:45 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20202510=5FRTX4090=E7=AC=94?= =?UTF-8?q?=E7=94=B5=E6=93=8D=E4=BD=9C=E8=AE=B0=E5=BD=95.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 202510_RTX4090笔电操作记录.md | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/202510_RTX4090笔电操作记录.md b/202510_RTX4090笔电操作记录.md index a3a0409..be43114 100644 --- a/202510_RTX4090笔电操作记录.md +++ b/202510_RTX4090笔电操作记录.md @@ -32,24 +32,14 @@ pip install vllm -i http://mirrors.cloud.tencent.com/pypi/simple --extra-index-u # 安装 modelscope pip install modelscope -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com -# 拉取 gpt-oss-20b 模型 +# 拉取 gpt-oss-20b 模型,由于显存不足,运行失败了 modelscope download --model openai-mirror/gpt-oss-20b --local_dir /home/ss/vllm-py12/gpt-oss-20b -# 运行 gpt-oss-20b 模型失败,移动端的 RTX4090 只有 16GB 显存,至少需要 16~24GB 显存 -vllm serve \ - /home/ss/vllm-py12/gpt-oss-20b \ - --port 18777 \ - --api-key token_lcfc \ - --served-model-name gpt-oss-20b \ - --gpu-memory-utilization 0.95 \ - --tool-call-parser openai \ - --enable-auto-tool-choice - -# Qwen3-8b 也需要 16~24GB显存,所以下载了 Qwen3-0.6B +# 下载了 Qwen3-0.6B modelscope download --model Qwen/Qwen3-0.6B --local_dir /home/ss/vllm-py12/qwen3-06b -# 运行 Qwen3-8b -vllm serve /home/ss/vllm-py12/qwen3-06b \ +# 运行 Qwen3-0.6B +nohup vllm serve /home/ss/vllm-py12/qwen3-06b \ --host 0.0.0.0 \ --port 8000 \ --served-model-name Qwen3-0.6B \ @@ -57,5 +47,14 @@ vllm serve /home/ss/vllm-py12/qwen3-06b \ --dtype auto \ --gpu-memory-utilization 0.9 \ --max-model-len 32768 \ - --trust-remote-code + --trust-remote-code \ + >> /home/ss/vllm-py12/vllm.log 2>&1 \ + & echo $! > /home/ss/vllm-py12/vllm.pid + +# 安装了抓包工具 tshark 和 ngrep +sudo apt install ngrep +sudo apt-get install tshark + +# 运行了1个定时任务脚本 + ``` \ No newline at end of file