更新 20251011.md
This commit is contained in:
parent
495e1e5786
commit
b79bd8bbc4
285
20251011.md
285
20251011.md
@ -2,292 +2,21 @@
|
|||||||
|
|
||||||
## 问题原因
|
## 问题原因
|
||||||
|
|
||||||
vllm在处理请求时,调用了 get_structured_output_key 函数,在处理 structured_outputs 参数时,由于不满足所有的情况,导致了抛出**No valid structured output parameter found**异常,该异常未被 EngineCore 捕获导致引擎崩溃,APIServer 发现引擎死了,自己也退出了进程。
|
由于外部调用 vllm 的 OpenAI API 服务时候,传入的请求参数让 vllm 调用了**get_structured_output_key**函数。在该函数里不能被正确处理,抛出了**No valid structured output parameter found**异常,该异常导致了 vllm 的 EngineCore 和 APIServer 进程死亡。
|
||||||
|
|
||||||
## 使用抓包工具 tshark 监控 http 请求
|
|
||||||
|
|
||||||
### 在合适的目录下执行下面的命令,tshark 需要 root 权限,请使用 root 账号执行
|
|
||||||
|
|
||||||
##### 安装 tshark(已执行完毕)
|
|
||||||
```
|
|
||||||
sudo apt-get install tshark
|
|
||||||
```
|
|
||||||
|
|
||||||
##### 创建一个 shell 脚本
|
|
||||||
```
|
|
||||||
vim hook_vllm_gpt-oss-120b.sh
|
|
||||||
```
|
|
||||||
脚本内容如下:
|
|
||||||
```
|
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# ========================================
|
|
||||||
# 监控本地 v1/chat/completions 接口的 HTTP 请求与响应
|
|
||||||
# 仅保留最近 20 条日志(含流式响应,如 SSE)
|
|
||||||
# 使用 tshark + TCP 流跟踪
|
|
||||||
# ========================================
|
|
||||||
|
|
||||||
# 配置
|
|
||||||
INTERFACE="lo" # 本地回环接口
|
|
||||||
PORT="8077"
|
|
||||||
ENDPOINT="/v1/chat/completions"
|
|
||||||
LOG_FILE="/hook/chat_completions.log"
|
|
||||||
TEMP_LOG="/hook/chat_completions.tmp"
|
|
||||||
PID_FILE="/hook/hook_vllm_gpt-oss-120b.pid"
|
|
||||||
|
|
||||||
# 检查是否已运行
|
|
||||||
if [ -f "$PID_FILE" ]; then
|
|
||||||
if ps -p $(cat "$PID_FILE") > /dev/null 2>&1; then
|
|
||||||
echo "【错误】监控脚本已在运行 (PID: $(cat $PID_FILE))"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
rm -f "$PID_FILE"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 记录 PID
|
|
||||||
echo $$ > "$PID_FILE"
|
|
||||||
|
|
||||||
# 清空日志
|
|
||||||
> "$LOG_FILE"
|
|
||||||
|
|
||||||
echo "✅ 开始监控 http://127.0.0.1:$PORT$ENDPOINT"
|
|
||||||
echo "📝 日志将保存到: $LOG_FILE"
|
|
||||||
echo "⏳ 仅保留最近 20 条,按 Ctrl+C 停止"
|
|
||||||
|
|
||||||
# 信号处理:清理 PID 文件
|
|
||||||
trap 'rm -f "$PID_FILE"; echo "⏹️ 监控已停止"; exit 0' SIGINT SIGTERM
|
|
||||||
|
|
||||||
# 使用 tshark 跟踪 TCP 流
|
|
||||||
sudo tshark \
|
|
||||||
-i "$INTERFACE" \
|
|
||||||
-f "tcp port $PORT and host 127.0.0.1" \
|
|
||||||
-q \
|
|
||||||
-z "follow,tcp,ascii" \
|
|
||||||
2>/dev/null | \
|
|
||||||
stdbuf -oL awk -v endpoint="$ENDPOINT" -v log="$LOG_FILE" -v temp="$TEMP" '
|
|
||||||
BEGIN {
|
|
||||||
RS = "\n\n"
|
|
||||||
ORS = ""
|
|
||||||
in_request = 0
|
|
||||||
buffer = ""
|
|
||||||
count = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# 分割流,识别每条 TCP 流
|
|
||||||
{
|
|
||||||
if (match($0, /GET|POST|PUT|DELETE|HTTP/) && index($0, endpoint)) {
|
|
||||||
# 提取时间戳(tshark 输出第一行包含时间)
|
|
||||||
if (match($0, /Following.*on port [0-9]+$/)) {
|
|
||||||
ts = substr($0, RSTART, RLENGTH)
|
|
||||||
gsub(/.*on/, "on", ts)
|
|
||||||
} else {
|
|
||||||
ts = "unknown time"
|
|
||||||
}
|
|
||||||
|
|
||||||
# 提取请求行和头
|
|
||||||
split($0, lines, /\n/)
|
|
||||||
for (i=1; i<=length(lines); i++) {
|
|
||||||
if (lines[i] ~ /(GET|POST|PUT|DELETE)/ && index(lines[i], endpoint)) {
|
|
||||||
request_line = lines[i]
|
|
||||||
}
|
|
||||||
if (lines[i] ~ /Content-Type:/ || lines[i] ~ /Authorization:/ || lines[i] ~ /User-Agent:/) {
|
|
||||||
headers = headers "\n " lines[i]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# 提取请求体(通常在空行后)
|
|
||||||
body = ""
|
|
||||||
in_body = 0
|
|
||||||
for (i=1; i<=length(lines); i++) {
|
|
||||||
if (lines[i] == "" || lines[i] ~ /HTTP\/[0-9.]/) {
|
|
||||||
in_body = 1
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if (in_body && lines[i] !~ /(No response found|Following)/) {
|
|
||||||
body = body lines[i] "\n"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# 提取响应部分(HTTP/ 开头)
|
|
||||||
response = ""
|
|
||||||
for (i=1; i<=length(lines); i++) {
|
|
||||||
if (lines[i] ~ /^HTTP\// && i > 1) {
|
|
||||||
for (j=i; j<=length(lines); j++) {
|
|
||||||
if (lines[j] !~ /Following/) {
|
|
||||||
response = response lines[j] "\n"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# 构造日志条目
|
|
||||||
entry = "========================================\n"
|
|
||||||
entry = entry "🕒 " ts "\n"
|
|
||||||
entry = entry "📤 请求: " request_line "\n"
|
|
||||||
if (headers != "") {
|
|
||||||
entry = entry "📎 头部:\n" headers "\n"
|
|
||||||
}
|
|
||||||
if (body != "") {
|
|
||||||
entry = entry "📦 请求体:\n" body "\n"
|
|
||||||
}
|
|
||||||
if (response != "") {
|
|
||||||
entry = entry "📥 响应:\n" response "\n"
|
|
||||||
}
|
|
||||||
entry = entry "========================================\n\n"
|
|
||||||
|
|
||||||
# 写入日志并保留最近 20 条
|
|
||||||
cmd = "echo -e \"" entry "\" >> " log " && tail -n 200 " log " > " temp " && mv " temp " " log
|
|
||||||
system(cmd)
|
|
||||||
|
|
||||||
# 重置
|
|
||||||
headers = ""
|
|
||||||
body = ""
|
|
||||||
response = ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
'
|
|
||||||
|
|
||||||
# 正常退出时清理
|
|
||||||
rm -f "$PID_FILE"
|
|
||||||
```
|
|
||||||
|
|
||||||
##### 赋予执行权限
|
|
||||||
```
|
|
||||||
chmod +x hook_vllm_gpt-oss-120b.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
##### 后台运行脚本
|
|
||||||
```
|
|
||||||
nohup /hook/hook_vllm_gpt-oss-120b.sh > /dev/null 2>&1 &
|
|
||||||
```
|
|
||||||
|
|
||||||
##### 查看请求日志
|
|
||||||
|
|
||||||
```
|
|
||||||
tail -f /hook/chat_completions.log
|
|
||||||
```
|
|
||||||
|
|
||||||
##### 停止脚本
|
|
||||||
|
|
||||||
```
|
|
||||||
pkill -f hook_vllm_gpt-oss-120b.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
### 函数 get_structured_output_key 实现
|
|
||||||
|
|
||||||
#### 分支:release/v0.11.0
|
|
||||||
|
|
||||||
|
## 源码出处
|
||||||
https://github.com/vllm-project/vllm/blob/releases/v0.11.0/vllm/v1/structured_output/request.py
|
https://github.com/vllm-project/vllm/blob/releases/v0.11.0/vllm/v1/structured_output/request.py
|
||||||
|
|
||||||
```python
|
|
||||||
def get_structured_output_key(sampling_params: SamplingParams) -> StructuredOutputKey:
|
|
||||||
params = sampling_params.structured_outputs
|
|
||||||
assert params is not None, "params can't be None."
|
|
||||||
if params.json is not None:
|
|
||||||
if not isinstance(params.json, str):
|
|
||||||
json_str = json.dumps(params.json)
|
|
||||||
else:
|
|
||||||
json_str = params.json
|
|
||||||
return (StructuredOutputOptions.JSON, json_str)
|
|
||||||
elif params.json_object:
|
|
||||||
return (StructuredOutputOptions.JSON_OBJECT, "")
|
|
||||||
elif params.regex is not None:
|
|
||||||
return (StructuredOutputOptions.REGEX, params.regex)
|
|
||||||
elif params.choice is not None:
|
|
||||||
if not isinstance(params.choice, str):
|
|
||||||
json_str = json.dumps(params.choice)
|
|
||||||
else:
|
|
||||||
json_str = params.choice
|
|
||||||
return (StructuredOutputOptions.CHOICE, json_str)
|
|
||||||
elif params.grammar is not None:
|
|
||||||
return (StructuredOutputOptions.GRAMMAR, params.grammar)
|
|
||||||
elif params.structural_tag is not None:
|
|
||||||
return (StructuredOutputOptions.STRUCTURAL_TAG, params.structural_tag)
|
|
||||||
else:
|
|
||||||
raise ValueError("No valid structured output parameter found")
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 分支:release/v0.10.2
|
## 问题追踪
|
||||||
|
|
||||||
https://github.com/vllm-project/vllm/blob/releases/v0.10.2/vllm/v1/structured_output/request.py
|
由于 vllm 没有提供 http 请求参数的日志打印,也没有集成监控 http 请求的三方工具。所以在 Ubuntu 上安装了 tshark 抓包工具。通过 Java 脚本启动 tshark 命令,并将 tshark 抓包到的日志内容写入到磁盘文件。下一次 vllm 崩溃时,根据磁盘文件存储的日志内容分析是什么参数导致了**get_structured_output_key**的异常。
|
||||||
|
|
||||||
```python
|
> 使用 Java 执行 tshark 是为了抹掉 Linux 和 Windows 的平台差异,不用修改代码和命令即可直接运行。
|
||||||
def get_structured_output_key(
|
|
||||||
sampling_params: SamplingParams) -> StructuredOutputKey:
|
|
||||||
params = sampling_params.guided_decoding
|
|
||||||
assert params is not None, "params can't be None."
|
|
||||||
if params.json is not None:
|
|
||||||
if not isinstance(params.json, str):
|
|
||||||
json_str = json.dumps(params.json)
|
|
||||||
else:
|
|
||||||
json_str = params.json
|
|
||||||
return (StructuredOutputOptions.JSON, json_str)
|
|
||||||
elif params.json_object:
|
|
||||||
return (StructuredOutputOptions.JSON_OBJECT, "")
|
|
||||||
elif params.regex is not None:
|
|
||||||
return (StructuredOutputOptions.REGEX, params.regex)
|
|
||||||
elif params.choice is not None:
|
|
||||||
if not isinstance(params.choice, str):
|
|
||||||
json_str = json.dumps(params.choice)
|
|
||||||
else:
|
|
||||||
json_str = params.choice
|
|
||||||
return (StructuredOutputOptions.CHOICE, json_str)
|
|
||||||
elif params.grammar is not None:
|
|
||||||
return (StructuredOutputOptions.GRAMMAR, params.grammar)
|
|
||||||
elif params.structural_tag is not None:
|
|
||||||
return (StructuredOutputOptions.STRUCTURAL_TAG, params.structural_tag)
|
|
||||||
else:
|
|
||||||
raise ValueError("No valid structured output parameter found")
|
|
||||||
```
|
|
||||||
> 2个版本的唯一区别,仅仅是提取`SamplingParams`的属性不一样,其它判断逻辑都是一致的
|
|
||||||
|
|
||||||
### SamplingParams
|
> Java 脚本内容:
|
||||||
|
|
||||||
https://github.com/vllm-project/vllm/blob/releases/v0.11.0/vllm/sampling_params.py
|
## 崩溃日志
|
||||||
|
|
||||||
```python
|
|
||||||
# 其它参数已省略
|
|
||||||
class SamplingParams(
|
|
||||||
💡初始化逻辑是没问题的,默认值是None
|
|
||||||
structured_outputs: Optional[StructuredOutputsParams] = None
|
|
||||||
guided_decoding: Optional[GuidedDecodingParams] = None
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def from_optional(
|
|
||||||
💡默认值也是None
|
|
||||||
structured_outputs: Optional[StructuredOutputsParams] = None,
|
|
||||||
) -> "SamplingParams":
|
|
||||||
if guided_decoding is not None:
|
|
||||||
warnings.warn(
|
|
||||||
"guided_decoding is deprecated. This will be removed in "
|
|
||||||
💡官方将在 v0.12.0 废弃 guided_decoding 参数,使用 structured_outputs 参数替代,在 v0.11.0 版本做了兼容,
|
|
||||||
"v0.12.0 or v1.0.0, which ever is soonest. Please use "
|
|
||||||
"structured_outputs instead.",
|
|
||||||
DeprecationWarning,
|
|
||||||
stacklevel=2)
|
|
||||||
structured_outputs = guided_decoding
|
|
||||||
guided_decoding = None
|
|
||||||
return SamplingParams(
|
|
||||||
structured_outputs=structured_outputs,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### StructuredOutputOptions
|
|
||||||
|
|
||||||
```python
|
|
||||||
class StructuredOutputOptions(enum.Enum):
|
|
||||||
JSON = enum.auto()
|
|
||||||
JSON_OBJECT = enum.auto()
|
|
||||||
REGEX = enum.auto()
|
|
||||||
GRAMMAR = enum.auto()
|
|
||||||
CHOICE = enum.auto()
|
|
||||||
STRUCTURAL_TAG = enum.auto()
|
|
||||||
```
|
|
||||||
> 💡只支持这6种类型,每个类型都对应 structured_outputs 下面的一个不同的参数。
|
|
||||||
|
|
||||||
## 崩溃日志片段
|
|
||||||
|
|
||||||
```text
|
```text
|
||||||
[1;36m(EngineCore_DP0 pid=2738693)[0;0m ERROR 10-10 10:43:10 [core.py:710] EngineCore encountered a fatal error.
|
[1;36m(EngineCore_DP0 pid=2738693)[0;0m ERROR 10-10 10:43:10 [core.py:710] EngineCore encountered a fatal error.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user