更新 20251014.md
This commit is contained in:
parent
ddbee9f81f
commit
32bd09d0fe
130
20251014.md
130
20251014.md
@ -62,133 +62,3 @@ vllm serve /home/ss/vllm-py12/qwen3-06b \
|
||||
--trust-remote-code
|
||||
|
||||
```
|
||||
#### 新建了一个脚本去测试结构化输出函数的bug
|
||||
|
||||
```shell
|
||||
vim /home/ss/vllm-py12/vllm-crash-test.py
|
||||
```
|
||||
|
||||
```python
|
||||
from enum import Enum
|
||||
from pydantic import BaseModel
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.sampling_params import GuidedDecodingParams
|
||||
|
||||
# 定义结构化输出 schema
|
||||
class CarType(str, Enum):
|
||||
sedan = "sedan"
|
||||
suv = "SUV"
|
||||
truck = "Truck"
|
||||
coupe = "Coupe"
|
||||
|
||||
class CarDescription(BaseModel):
|
||||
brand: str
|
||||
model: str
|
||||
car_type: CarType
|
||||
|
||||
# 获取 JSON schema
|
||||
json_schema = CarDescription.model_json_schema()
|
||||
|
||||
# 设置 prompt
|
||||
prompt = (
|
||||
"Generate a JSON with the brand, model and car_type of "
|
||||
"the most iconic car from the 90's"
|
||||
)
|
||||
|
||||
def format_output(title: str, output: str):
|
||||
print(f"{'-' * 50}\n{title}: {output}\n{'-' * 50}")
|
||||
|
||||
def main():
|
||||
# 1. 初始化本地 LLM,加载本地模型文件
|
||||
llm = LLM(
|
||||
model="/home/ss/vllm-py12/qwen3-06b", # 指向你的本地模型路径
|
||||
max_model_len=1024,
|
||||
enable_prefix_caching=True,
|
||||
gpu_memory_utilization=0.9,
|
||||
)
|
||||
|
||||
# 2. 构造一个无效的 guided_decoding:没有任何有效字段
|
||||
# 这将导致 get_structured_output_key() 中 raise ValueError
|
||||
guided_decoding_invalid = GuidedDecodingParams(
|
||||
json=None,
|
||||
json_object=False,
|
||||
regex=None,
|
||||
choice=None,
|
||||
grammar=None,
|
||||
structural_tag=None
|
||||
)
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
temperature=0.0,
|
||||
max_tokens=512,
|
||||
guided_decoding=guided_decoding_invalid # ✅ 传入但无有效字段
|
||||
)
|
||||
|
||||
# 3. 生成输出(预期会触发 ValueError)
|
||||
try:
|
||||
outputs = llm.generate(prompts=prompt, sampling_params=sampling_params)
|
||||
for output in outputs:
|
||||
generated_text = output.outputs[0].text
|
||||
format_output("Output", generated_text)
|
||||
except Exception as e:
|
||||
print(f"Caught expected error: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
#### 复现
|
||||
|
||||
```shell
|
||||
python /home/ss/vllm-py12/vllm-crash-test.py
|
||||
```
|
||||
|
||||
```text
|
||||
/home/ss/vllm-py12/vllm-crash-test.py:50: DeprecationWarning: guided_decoding is deprecated. This will be removed in v0.12.0 or v1.0.0, which ever is soonest. Please use structured_outputs instead.
|
||||
sampling_params = SamplingParams(
|
||||
Adding requests: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 93.46it/s]
|
||||
Processed prompts: 0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s](EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] EngineCore encountered a fatal error.
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] Traceback (most recent call last):
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 701, in run_engine_core
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] engine_core.run_busy_loop()
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 728, in run_busy_loop
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] self._process_engine_step()
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 754, in _process_engine_step
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] outputs, model_executed = self.step_fn()
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 283, in step
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] scheduler_output = self.scheduler.schedule()
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/core/sched/scheduler.py", line 359, in schedule
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] if structured_output_req and structured_output_req.grammar:
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/structured_output/request.py", line 45, in grammar
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] completed = self._check_grammar_completion()
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/structured_output/request.py", line 33, in _check_grammar_completion
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] self._grammar = self._grammar.result(timeout=0.0001)
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/concurrent/futures/_base.py", line 449, in result
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] return self.__get_result()
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] raise self._exception
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/concurrent/futures/thread.py", line 59, in run
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] result = self.fn(*self.args, **self.kwargs)
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/structured_output/__init__.py", line 128, in _async_create_grammar
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] key = request.structured_output_request.structured_output_key # type: ignore[union-attr]
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/functools.py", line 998, in __get__
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] val = self.func(instance)
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/structured_output/request.py", line 58, in structured_output_key
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] return get_structured_output_key(self.sampling_params)
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/structured_output/request.py", line 86, in get_structured_output_key
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] raise ValueError("No valid structured output parameter found")
|
||||
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ValueError: No valid structured output parameter found
|
||||
Caught expected error: EngineCore encountered an issue. See stack trace (above) for the root cause.
|
||||
Processed prompts: 0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]
|
||||
(vllm-py12) root@ss-IdeaPad-PC:/home/ss/vllm-py12#
|
||||
```
|
||||
Loading…
Reference in New Issue
Block a user