From c53844f93e03c4b59ec95578b292b5e4953b9f75 Mon Sep 17 00:00:00 2001
From: 8ga <jkwlstv@outlook.com>
Date: Sat, 11 Oct 2025 13:52:50 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=2020251011.md?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 20251011.md | 605 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 565 insertions(+), 40 deletions(-)

diff --git a/20251011.md b/20251011.md
index 99e752d..7c64943 100644
--- a/20251011.md
+++ b/20251011.md
@@ -1,52 +1,577 @@
-## 崩溃原因
+## 源码追踪
 
-- 一个请求被发送到 vLLM 服务，该请求包含了结构化输出的意图
-- vLLM 在处理请求时，进入 structured_output_request.structured_output_key 的逻辑，最终调用 get_structured_output_key(sampling_params)函数。
-- 该函数遍历 sampling_params 中的各种参数（如 grammar, json_schema, regex 等），但没有找到任何一个有效的结构化输出参数。
-- 因此抛出异常：raise ValueError("No valid structured output parameter found")
-- 这个 ValueError 在后台线程中抛出，但未被捕获，导致 EngineCore 进程（PID 2738693）崩溃
-- 主 API 服务（APIServer）检测到 EngineCore 崩溃，抛出 EngineDeadError，最终整个服务终止。
+### get_structured_output_key 函数实现
 
-## 解决方案一
+#### 分支：release/v0.11.0 
 
-确保请求进入vllm的时候，提供有效的结构化输出参数。例如：
-
-```json
-{
-  "prompt": "生成一个用户信息",
-  "structured_output": {
-    "type": "json",
-    "schema": {
-      "type": "object",
-      "properties": {
-        "name": {"type": "string"},
-        "age": {"type": "integer"}
-      },
-      "required": ["name", "age"]
-    }
-  }
-}
+```python
+# https://github.com/vllm-project/vllm/blob/releases/v0.11.0/vllm/v1/structured_output/request.py
+def get_structured_output_key(sampling_params: SamplingParams) -> StructuredOutputKey:
+    params = sampling_params.structured_outputs
+    assert params is not None, "params can't be None."
+    # structured_outputs 不满足以下任何一个条件
+    if params.json is not None:
+        if not isinstance(params.json, str):
+            json_str = json.dumps(params.json)
+        else:
+            json_str = params.json
+        return (StructuredOutputOptions.JSON, json_str)
+    elif params.json_object:
+        return (StructuredOutputOptions.JSON_OBJECT, "")
+    elif params.regex is not None:
+        return (StructuredOutputOptions.REGEX, params.regex)
+    elif params.choice is not None:
+        if not isinstance(params.choice, str):
+            json_str = json.dumps(params.choice)
+        else:
+            json_str = params.choice
+        return (StructuredOutputOptions.CHOICE, json_str)
+    elif params.grammar is not None:
+        return (StructuredOutputOptions.GRAMMAR, params.grammar)
+    elif params.structural_tag is not None:
+        return (StructuredOutputOptions.STRUCTURAL_TAG, params.structural_tag)
+    else:
+        # 最终抛出了这个Error
+        raise ValueError("No valid structured output parameter found")
 ```
 
-##### 错误示例
+#### 分支：release/v0.10.2
 
-```json
-{
-  "prompt": "生成一个JSON",
-  "structured_output": {}
-}
+```python
+# https://github.com/vllm-project/vllm/blob/releases/v0.10.2/vllm/v1/structured_output/request.py
+def get_structured_output_key(
+        sampling_params: SamplingParams) -> StructuredOutputKey:
+    params = sampling_params.guided_decoding
+    assert params is not None, "params can't be None."
+    if params.json is not None:
+        if not isinstance(params.json, str):
+            json_str = json.dumps(params.json)
+        else:
+            json_str = params.json
+        return (StructuredOutputOptions.JSON, json_str)
+    elif params.json_object:
+        return (StructuredOutputOptions.JSON_OBJECT, "")
+    elif params.regex is not None:
+        return (StructuredOutputOptions.REGEX, params.regex)
+    elif params.choice is not None:
+        if not isinstance(params.choice, str):
+            json_str = json.dumps(params.choice)
+        else:
+            json_str = params.choice
+        return (StructuredOutputOptions.CHOICE, json_str)
+    elif params.grammar is not None:
+        return (StructuredOutputOptions.GRAMMAR, params.grammar)
+    elif params.structural_tag is not None:
+        return (StructuredOutputOptions.STRUCTURAL_TAG, params.structural_tag)
+    else:
+        raise ValueError("No valid structured output parameter found")
 ```
 
-```json
-{
-  "prompt": "生成一个JSON",
-  "json_schema": null
-}
+### SamplingParams 源码
+
+#### 分支：release/v0.11.0
+
+```python
+
+```python
+# https://github.com/vllm-project/vllm/blob/releases/v0.11.0/vllm/sampling_params.py
+class SamplingParams(
+        msgspec.Struct,
+        omit_defaults=True,  # type: ignore[call-arg]
+        # required for @cached_property.
+        dict=True):  # type: ignore[call-arg]
+    """Sampling parameters for text generation.
+
+    Overall, we follow the sampling parameters from the OpenAI text completion
+    API (https://platform.openai.com/docs/api-reference/completions/create).
+    In addition, we support beam search, which is not supported by OpenAI.
+    """
+
+    n: int = 1
+    """Number of outputs to return for the given prompt request.
+
+    NOTE:
+        `AsyncLLM` streams outputs by default. When `n > 1`, all `n` outputs
+        are generated and streamed cumulatively per request. To see all `n`
+        outputs upon completion, use `output_kind=RequestOutputKind.FINAL_ONLY`
+        in `SamplingParams`."""
+    best_of: Optional[int] = None
+    """Number of output sequences that are generated from the prompt. From
+    these `best_of` sequences, the top `n` sequences are returned. `best_of`
+    must be greater than or equal to `n`. By default, `best_of` is set to `n`.
+    Warning, this is only supported in V0."""
+    _real_n: Optional[int] = None
+    presence_penalty: float = 0.0
+    """Penalizes new tokens based on whether they appear in the generated text
+    so far. Values > 0 encourage the model to use new tokens, while values < 0
+    encourage the model to repeat tokens."""
+    frequency_penalty: float = 0.0
+    """Penalizes new tokens based on their frequency in the generated text so
+    far. Values > 0 encourage the model to use new tokens, while values < 0
+    encourage the model to repeat tokens."""
+    repetition_penalty: float = 1.0
+    """Penalizes new tokens based on whether they appear in the prompt and the
+    generated text so far. Values > 1 encourage the model to use new tokens,
+    while values < 1 encourage the model to repeat tokens."""
+    temperature: float = 1.0
+    """Controls the randomness of the sampling. Lower values make the model
+    more deterministic, while higher values make the model more random. Zero
+    means greedy sampling."""
+    top_p: float = 1.0
+    """Controls the cumulative probability of the top tokens to consider. Must
+    be in (0, 1]. Set to 1 to consider all tokens."""
+    top_k: int = 0
+    """Controls the number of top tokens to consider. Set to 0 (or -1) to
+    consider all tokens."""
+    min_p: float = 0.0
+    """Represents the minimum probability for a token to be considered,
+    relative to the probability of the most likely token. Must be in [0, 1].
+    Set to 0 to disable this."""
+    seed: Optional[int] = None
+    """Random seed to use for the generation."""
+    stop: Optional[Union[str, list[str]]] = None
+    """String(s) that stop the generation when they are generated. The returned
+    output will not contain the stop strings."""
+    stop_token_ids: Optional[list[int]] = None
+    """Token IDs that stop the generation when they are generated. The returned
+    output will contain the stop tokens unless the stop tokens are special
+    tokens."""
+    ignore_eos: bool = False
+    """Whether to ignore the EOS token and continue generating
+    tokens after the EOS token is generated."""
+    max_tokens: Optional[int] = 16
+    """Maximum number of tokens to generate per output sequence."""
+    min_tokens: int = 0
+    """Minimum number of tokens to generate per output sequence before EOS or
+    `stop_token_ids` can be generated"""
+    logprobs: Optional[int] = None
+    """Number of log probabilities to return per output token. When set to
+    `None`, no probability is returned. If set to a non-`None` value, the
+    result includes the log probabilities of the specified number of most
+    likely tokens, as well as the chosen tokens. Note that the implementation
+    follows the OpenAI API: The API will always return the log probability of
+    the sampled token, so there may be up to `logprobs+1` elements in the
+    response. When set to -1, return all `vocab_size` log probabilities."""
+    prompt_logprobs: Optional[int] = None
+    """Number of log probabilities to return per prompt token.
+    When set to -1, return all `vocab_size` log probabilities."""
+    # NOTE: This parameter is only exposed at the engine level for now.
+    # It is not exposed in the OpenAI API server, as the OpenAI API does
+    # not support returning only a list of token IDs.
+    detokenize: bool = True
+    """Whether to detokenize the output."""
+    skip_special_tokens: bool = True
+    """Whether to skip special tokens in the output."""
+    spaces_between_special_tokens: bool = True
+    """Whether to add spaces between special tokens in the output."""
+    # Optional[list[LogitsProcessor]] type. We use Any here because
+    # Optional[list[LogitsProcessor]] type is not supported by msgspec.
+    logits_processors: Optional[Any] = None
+    """Functions that modify logits based on previously generated tokens, and
+    optionally prompt tokens as a first argument."""
+    include_stop_str_in_output: bool = False
+    """Whether to include the stop strings in output text."""
+    truncate_prompt_tokens: Optional[Annotated[int,
+                                               msgspec.Meta(ge=-1)]] = None
+    """If set to -1, will use the truncation size supported by the model. If
+    set to an integer k, will use only the last k tokens from the prompt
+    (i.e., left truncation). If set to `None`, truncation is disabled."""
+    output_kind: RequestOutputKind = RequestOutputKind.CUMULATIVE
+
+    # The below fields are not supposed to be used as an input.
+    # They are set in post_init.
+    output_text_buffer_length: int = 0
+    _all_stop_token_ids: set[int] = msgspec.field(default_factory=set)
+
+    # Fields used to construct logits processors
+    structured_outputs: Optional[StructuredOutputsParams] = None
+    """Parameters for configuring structured outputs."""
+    guided_decoding: Optional[GuidedDecodingParams] = None
+    """Deprecated alias for structured_outputs."""
+    logit_bias: Optional[dict[int, float]] = None
+    """If provided, the engine will construct a logits processor that applies
+    these logit biases."""
+    allowed_token_ids: Optional[list[int]] = None
+    """If provided, the engine will construct a logits processor which only
+    retains scores for the given token ids."""
+    extra_args: Optional[dict[str, Any]] = None
+    """Arbitrary additional args, that can be used by custom sampling
+    implementations, plugins, etc. Not used by any in-tree sampling
+    implementations."""
+
+    # Fields used for bad words
+    bad_words: Optional[list[str]] = None
+    """Words that are not allowed to be generated. More precisely, only the
+    last token of a corresponding token sequence is not allowed when the next
+    generated token can complete the sequence."""
+    _bad_words_token_ids: Optional[list[list[int]]] = None
+
+    @staticmethod
+    def from_optional(
+        n: Optional[int] = 1,
+        best_of: Optional[int] = None,
+        presence_penalty: Optional[float] = 0.0,
+        frequency_penalty: Optional[float] = 0.0,
+        repetition_penalty: Optional[float] = 1.0,
+        temperature: Optional[float] = 1.0,
+        top_p: Optional[float] = 1.0,
+        top_k: int = 0,
+        min_p: float = 0.0,
+        seed: Optional[int] = None,
+        stop: Optional[Union[str, list[str]]] = None,
+        stop_token_ids: Optional[list[int]] = None,
+        bad_words: Optional[list[str]] = None,
+        include_stop_str_in_output: bool = False,
+        ignore_eos: bool = False,
+        max_tokens: Optional[int] = 16,
+        min_tokens: int = 0,
+        logprobs: Optional[int] = None,
+        prompt_logprobs: Optional[int] = None,
+        detokenize: bool = True,
+        skip_special_tokens: bool = True,
+        spaces_between_special_tokens: bool = True,
+        logits_processors: Optional[list[LogitsProcessor]] = None,
+        truncate_prompt_tokens: Optional[Annotated[int,
+                                                   msgspec.Meta(
+                                                       ge=-1)]] = None,
+        output_kind: RequestOutputKind = RequestOutputKind.CUMULATIVE,
+        structured_outputs: Optional[StructuredOutputsParams] = None,
+        guided_decoding: Optional[GuidedDecodingParams] = None,
+        logit_bias: Optional[Union[dict[int, float], dict[str, float]]] = None,
+        allowed_token_ids: Optional[list[int]] = None,
+        extra_args: Optional[dict[str, Any]] = None,
+    ) -> "SamplingParams":
+        if logit_bias is not None:
+            # Convert token_id to integer
+            # Clamp the bias between -100 and 100 per OpenAI API spec
+            logit_bias = {
+                int(token): min(100.0, max(-100.0, bias))
+                for token, bias in logit_bias.items()
+            }
+        if guided_decoding is not None:
+            warnings.warn(
+                "guided_decoding is deprecated. This will be removed in "
+                "v0.12.0 or v1.0.0, which ever is soonest. Please use "
+                "structured_outputs instead.",
+                DeprecationWarning,
+                stacklevel=2)
+            structured_outputs = guided_decoding
+            guided_decoding = None
+
+        return SamplingParams(
+            n=1 if n is None else n,
+            best_of=best_of,
+            presence_penalty=0.0
+            if presence_penalty is None else presence_penalty,
+            frequency_penalty=0.0
+            if frequency_penalty is None else frequency_penalty,
+            repetition_penalty=1.0
+            if repetition_penalty is None else repetition_penalty,
+            temperature=1.0 if temperature is None else temperature,
+            top_p=1.0 if top_p is None else top_p,
+            top_k=top_k,
+            min_p=min_p,
+            seed=seed,
+            stop=stop,
+            stop_token_ids=stop_token_ids,
+            bad_words=bad_words,
+            include_stop_str_in_output=include_stop_str_in_output,
+            ignore_eos=ignore_eos,
+            max_tokens=max_tokens,
+            min_tokens=min_tokens,
+            logprobs=logprobs,
+            prompt_logprobs=prompt_logprobs,
+            detokenize=detokenize,
+            skip_special_tokens=skip_special_tokens,
+            spaces_between_special_tokens=spaces_between_special_tokens,
+            logits_processors=logits_processors,
+            truncate_prompt_tokens=truncate_prompt_tokens,
+            output_kind=output_kind,
+            structured_outputs=structured_outputs,
+            logit_bias=logit_bias,
+            allowed_token_ids=allowed_token_ids,
+            extra_args=extra_args,
+        )
+
+    def __post_init__(self) -> None:
+        # how we deal with `best_of``:
+        # if `best_of`` is not set, we default to `n`;
+        # if `best_of`` is set, we set `n`` to `best_of`,
+        # and set `_real_n`` to the original `n`.
+        # when we return the result, we will check
+        # if we need to return `n` or `_real_n` results
+        if self.best_of:
+            if self.best_of < self.n:
+                raise ValueError(
+                    f"best_of must be greater than or equal to n, "
+                    f"got n={self.n} and best_of={self.best_of}.")
+            if not self._real_n:
+                self._real_n = self.n
+                self.n = self.best_of
+
+        if 0 < self.temperature < _MAX_TEMP:
+            logger.warning(
+                "temperature %s is less than %s, which may cause numerical "
+                "errors nan or inf in tensors. We have maxed it out to %s.",
+                self.temperature, _MAX_TEMP, _MAX_TEMP)
+            self.temperature = max(self.temperature, _MAX_TEMP)
+
+        if self.seed == -1:
+            self.seed = None
+
+        if self.stop is None:
+            self.stop = []
+        elif isinstance(self.stop, str):
+            self.stop = [self.stop]
+
+        if self.stop_token_ids is None:
+            self.stop_token_ids = []
+
+        if self.bad_words is None:
+            self.bad_words = []
+
+        if self.logprobs is True:
+            self.logprobs = 1
+
+        if self.prompt_logprobs is True:
+            self.prompt_logprobs = 1
+
+        # Number of characters to hold back for stop string evaluation
+        # until sequence is finished.
+        if self.stop and not self.include_stop_str_in_output:
+            self.output_text_buffer_length = max(len(s) for s in self.stop) - 1
+
+        self._verify_args()
+
+        if self.temperature < _SAMPLING_EPS:
+            # Zero temperature means greedy sampling.
+            self.top_p = 1.0
+            self.top_k = 0
+            self.min_p = 0.0
+            self._verify_greedy_sampling()
+
+        # eos_token_id is added to this by the engine
+        self._all_stop_token_ids.update(self.stop_token_ids)
+
+        if self.guided_decoding is not None:
+            warnings.warn(
+                "guided_decoding is deprecated. This will be removed in "
+                "v0.12.0 or v1.0.0, which ever is soonest. Please use "
+                "structured_outputs instead.",
+                DeprecationWarning,
+                stacklevel=2)
+            self.structured_outputs = self.guided_decoding
+            self.guided_decoding = None
+
+    def _verify_args(self) -> None:
+        if not isinstance(self.n, int):
+            raise ValueError(f"n must be an int, but is of "
+                             f"type {type(self.n)}")
+        if self.n < 1:
+            raise ValueError(f"n must be at least 1, got {self.n}.")
+        if self.best_of is not None:
+            if not isinstance(self.best_of, int):
+                raise ValueError(
+                    f"best_of must be an integer, got {type(self.best_of)}")
+            if self.best_of < 1:
+                raise ValueError(
+                    f"best_of must be at least 1, got {self.best_of}")
+            if self.best_of < self.n:
+                raise ValueError(
+                    f"best_of must be greater than or equal to n, "
+                    f"got n={self.n} and best_of={self.best_of}.")
+        if not -2.0 <= self.presence_penalty <= 2.0:
+            raise ValueError("presence_penalty must be in [-2, 2], got "
+                             f"{self.presence_penalty}.")
+        if not -2.0 <= self.frequency_penalty <= 2.0:
+            raise ValueError("frequency_penalty must be in [-2, 2], got "
+                             f"{self.frequency_penalty}.")
+        if self.repetition_penalty <= 0.0:
+            raise ValueError(
+                "repetition_penalty must be greater than zero, got "
+                f"{self.repetition_penalty}.")
+        if self.temperature < 0.0:
+            raise ValueError(
+                f"temperature must be non-negative, got {self.temperature}.")
+        if not 0.0 < self.top_p <= 1.0:
+            raise ValueError(f"top_p must be in (0, 1], got {self.top_p}.")
+        # quietly accept -1 as disabled, but prefer 0
+        if self.top_k < -1:
+            raise ValueError(f"top_k must be 0 (disable), or at least 1, "
+                             f"got {self.top_k}.")
+        if not isinstance(self.top_k, int):
+            raise TypeError(
+                f"top_k must be an integer, got {type(self.top_k).__name__}")
+        if not 0.0 <= self.min_p <= 1.0:
+            raise ValueError("min_p must be in [0, 1], got "
+                             f"{self.min_p}.")
+        if self.max_tokens is not None and self.max_tokens < 1:
+            raise ValueError(
+                f"max_tokens must be at least 1, got {self.max_tokens}.")
+        if self.min_tokens < 0:
+            raise ValueError(f"min_tokens must be greater than or equal to 0, "
+                             f"got {self.min_tokens}.")
+        if self.max_tokens is not None and self.min_tokens > self.max_tokens:
+            raise ValueError(
+                f"min_tokens must be less than or equal to "
+                f"max_tokens={self.max_tokens}, got {self.min_tokens}.")
+        if (self.logprobs is not None and self.logprobs != -1
+                and self.logprobs < 0):
+            raise ValueError(
+                f"logprobs must be non-negative or -1, got {self.logprobs}.")
+        if (self.prompt_logprobs is not None and self.prompt_logprobs != -1
+                and self.prompt_logprobs < 0):
+            raise ValueError(
+                f"prompt_logprobs must be non-negative or -1, got "
+                f"{self.prompt_logprobs}.")
+        if (self.truncate_prompt_tokens is not None
+                and (self.truncate_prompt_tokens == 0
+                     or self.truncate_prompt_tokens < -1)):
+            raise ValueError(
+                f"truncate_prompt_tokens must be an integer >= 1 or -1, "
+                f"got {self.truncate_prompt_tokens}")
+        assert isinstance(self.stop_token_ids, list)
+        if not all(isinstance(st_id, int) for st_id in self.stop_token_ids):
+            raise ValueError(f"stop_token_ids must contain only integers, "
+                             f"got {self.stop_token_ids}.")
+        assert isinstance(self.stop, list)
+        if any(not stop_str for stop_str in self.stop):
+            raise ValueError("stop cannot contain an empty string.")
+        if self.stop and not self.detokenize:
+            raise ValueError(
+                "stop strings are only supported when detokenize is True. "
+                "Set detokenize=True to use stop.")
+        if self.best_of != self._real_n and self.output_kind == (
+                RequestOutputKind.DELTA):
+            raise ValueError("best_of must equal n to use output_kind=DELTA")
+
+    def _verify_greedy_sampling(self) -> None:
+        if self.n > 1:
+            raise ValueError("n must be 1 when using greedy sampling, "
+                             f"got {self.n}.")
+
+    def update_from_generation_config(
+            self,
+            generation_config: dict[str, Any],
+            model_eos_token_id: Optional[int] = None) -> None:
+        """Update if there are non-default values from generation_config"""
+
+        if model_eos_token_id is not None:
+            # Add the eos token id into the sampling_params to support
+            # min_tokens processing.
+            self._all_stop_token_ids.add(model_eos_token_id)
+
+        # Update eos_token_id for generation
+        if (eos_ids := generation_config.get("eos_token_id")) is not None:
+            # it can be either int or list of int
+            eos_ids = {eos_ids} if isinstance(eos_ids, int) else set(eos_ids)
+            if model_eos_token_id is not None:
+                # We don't need to include the primary eos_token_id in
+                # stop_token_ids since it's handled separately for stopping
+                # purposes.
+                eos_ids.discard(model_eos_token_id)
+            if eos_ids:
+                self._all_stop_token_ids.update(eos_ids)
+                if not self.ignore_eos:
+                    eos_ids.update(self.stop_token_ids)
+                    self.stop_token_ids = list(eos_ids)
+
+    def update_from_tokenizer(self, tokenizer: AnyTokenizer) -> None:
+        if not self.bad_words:
+            return
+        self._bad_words_token_ids = []
+        for bad_word in self.bad_words:
+            # To prohibit words both at the beginning
+            # and in the middle of text
+            # (related to add_prefix_space tokenizer parameter)
+            for add_prefix_space in [False, True]:
+                prefix = " " if add_prefix_space else ""
+                prompt = prefix + bad_word.lstrip()
+                prompt_token_ids = tokenizer.encode(text=prompt,
+                                                    add_special_tokens=False)
+
+                # If no space at the beginning
+                # or if prefix space produces a new word token
+                if (not add_prefix_space) or (
+                        add_prefix_space and prompt_token_ids[0]
+                        != self._bad_words_token_ids[-1][0]
+                        and len(prompt_token_ids) == len(
+                            self._bad_words_token_ids[-1])):
+                    self._bad_words_token_ids.append(prompt_token_ids)
+
+        invalid_token_ids = [
+            token_id for bad_words_token_ids in self._bad_words_token_ids
+            for token_id in bad_words_token_ids
+            if token_id < 0 or token_id > tokenizer.max_token_id
+        ]
+        if len(invalid_token_ids) > 0:
+            raise ValueError(
+                f"The model vocabulary size is {tokenizer.max_token_id+1},"
+                f" but the following tokens"
+                f" were specified as bad: {invalid_token_ids}."
+                f" All token id values should be integers satisfying:"
+                f" 0 <= token_id <= {tokenizer.max_token_id}.")
+
+    @cached_property
+    def sampling_type(self) -> SamplingType:
+        if self.temperature < _SAMPLING_EPS:
+            return SamplingType.GREEDY
+        if self.seed is not None:
+            return SamplingType.RANDOM_SEED
+        return SamplingType.RANDOM
+
+    @property
+    def all_stop_token_ids(self) -> set[int]:
+        return self._all_stop_token_ids
+
+    @property
+    def bad_words_token_ids(self) -> Optional[list[list[int]]]:
+        # For internal use only. Backward compatibility not guaranteed
+        return self._bad_words_token_ids
+
+    def clone(self) -> "SamplingParams":
+        """Deep copy, but maybe not the LogitsProcessor objects.
+
+        LogitsProcessor objects may contain an arbitrary, nontrivial amount of
+        data that is expensive to copy. However, if not copied, the processor
+        needs to support parallel decoding for multiple sequences
+        See https://github.com/vllm-project/vllm/issues/3087
+        """
+
+        logit_processor_refs = None if self.logits_processors is None else {
+            id(lp): lp.clone() if hasattr(lp, 'clone') else lp
+            for lp in self.logits_processors
+        }
+        return copy.deepcopy(self, memo=logit_processor_refs)
+
+    def __repr__(self) -> str:
+        return (
+            f"SamplingParams(n={self.n}, "
+            f"presence_penalty={self.presence_penalty}, "
+            f"frequency_penalty={self.frequency_penalty}, "
+            f"repetition_penalty={self.repetition_penalty}, "
+            f"temperature={self.temperature}, "
+            f"top_p={self.top_p}, "
+            f"top_k={self.top_k}, "
+            f"min_p={self.min_p}, "
+            f"seed={self.seed}, "
+            f"stop={self.stop}, "
+            f"stop_token_ids={self.stop_token_ids}, "
+            f"bad_words={self.bad_words}, "
+            f"include_stop_str_in_output={self.include_stop_str_in_output}, "
+            f"ignore_eos={self.ignore_eos}, "
+            f"max_tokens={self.max_tokens}, "
+            f"min_tokens={self.min_tokens}, "
+            f"logprobs={self.logprobs}, "
+            f"prompt_logprobs={self.prompt_logprobs}, "
+            f"skip_special_tokens={self.skip_special_tokens}, "
+            "spaces_between_special_tokens="
+            f"{self.spaces_between_special_tokens}, "
+            f"truncate_prompt_tokens={self.truncate_prompt_tokens}, "
+            f"structured_outputs={self.structured_outputs}, "
+            f"extra_args={self.extra_args})")
+```
 ```
-
-## 解决方案二
-
-升级vllm的版本到最新稳定版，官方已做出大量优化和改进。
 
 ## 崩溃日志片段