更新 202510_定时监控vLLM进程.md
This commit is contained in:
parent
be520ea533
commit
1ce8d10bc1
@ -15,21 +15,33 @@ vim /hook/timer_bash.sh
|
|||||||
```
|
```
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# 定义 PID 文件路径
|
# 目标进程的 PID 文件路径
|
||||||
TARGET_PID_FILE="/hook/gpt-oss-120b.pid"
|
TARGET_PID_FILE="/hook/gpt-oss-120b.pid"
|
||||||
|
|
||||||
|
# tshark_bash 的 PID 文件路径
|
||||||
TSARK_PID_FILE="/hook/tshark_bash.pid"
|
TSARK_PID_FILE="/hook/tshark_bash.pid"
|
||||||
|
|
||||||
# 无限循环,每隔 5 分钟(300 秒)检查一次
|
# tshark_bash 脚本路径
|
||||||
|
TSARK_SCRIPT_PATH="/hook/tshark_bash.sh"
|
||||||
|
|
||||||
|
# tshark_bash 日志文件路径
|
||||||
|
TSARK_LOG_FILE="/hook/tshark_bash.log"
|
||||||
|
|
||||||
|
# shark.log 文件路径
|
||||||
|
SHARK_LOG_FILE="/hook/shark.log"
|
||||||
|
|
||||||
|
# 临时目录下要删除的 pcapng 文件路径模式
|
||||||
|
PCAPNG_FILES_PATTERN="/tmp/*.pcapng"
|
||||||
|
|
||||||
|
# 5分钟检查一次 vllm 进程是否存活
|
||||||
|
# 若 vllm 进程存活,清理 tshark 的临时文件,然后重启 tshark 脚本继续抓包
|
||||||
|
# 若 vllm 进程已死亡则 kill tshark 的进程
|
||||||
|
|
||||||
while true; do
|
while true; do
|
||||||
# 检查目标 PID 文件是否存在
|
|
||||||
if [[ -f "$TARGET_PID_FILE" ]]; then
|
if [[ -f "$TARGET_PID_FILE" ]]; then
|
||||||
TARGET_PID=$(cat "$TARGET_PID_FILE" 2>/dev/null)
|
TARGET_PID=$(cat "$TARGET_PID_FILE" 2>/dev/null)
|
||||||
|
|
||||||
# 检查读取到的 PID 是否为数字且进程是否存在
|
|
||||||
if [[ "$TARGET_PID" =~ ^[0-9]+$ ]] && ps -p "$TARGET_PID" > /dev/null 2>&1; then
|
if [[ "$TARGET_PID" =~ ^[0-9]+$ ]] && ps -p "$TARGET_PID" > /dev/null 2>&1; then
|
||||||
echo "$(date): 目标进程 (PID: $TARGET_PID) 存活,执行相应操作..."
|
echo "$(date): 目标进程 (PID: $TARGET_PID) 存活,执行相应操作..."
|
||||||
|
|
||||||
# 1. 杀掉 tshark_bash 进程
|
|
||||||
if [[ -f "$TSARK_PID_FILE" ]]; then
|
if [[ -f "$TSARK_PID_FILE" ]]; then
|
||||||
TSHARK_PID=$(cat "$TSARK_PID_FILE" 2>/dev/null)
|
TSHARK_PID=$(cat "$TSARK_PID_FILE" 2>/dev/null)
|
||||||
if [[ "$TSHARK_PID" =~ ^[0-9]+$ ]]; then
|
if [[ "$TSHARK_PID" =~ ^[0-9]+$ ]]; then
|
||||||
@ -42,25 +54,22 @@ while true; do
|
|||||||
echo "tshark_bash.pid 文件不存在,跳过 kill 操作"
|
echo "tshark_bash.pid 文件不存在,跳过 kill 操作"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 2. 删除 shark.log
|
if [[ -f "$SHARK_LOG_FILE" ]]; then
|
||||||
if [[ -f "/hook/shark.log" ]]; then
|
rm -rf "$SHARK_LOG_FILE"
|
||||||
rm -rf "/hook/shark.log"
|
echo "已删除 $SHARK_LOG_FILE"
|
||||||
echo "已删除 /hook/shark.log"
|
|
||||||
else
|
else
|
||||||
echo "/hook/shark.log 文件不存在,跳过删除"
|
echo "$SHARK_LOG_FILE 文件不存在,跳过删除"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 3. 删除 /tmp/*.pcapng
|
if ls $PCAPNG_FILES_PATTERN >/dev/null 2>&1; then
|
||||||
if ls /tmp/*.pcapng >/dev/null 2>&1; then
|
rm -rf $PCAPNG_FILES_PATTERN
|
||||||
rm -rf /tmp/*.pcapng
|
echo "已删除 $PCAPNG_FILES_PATTERN 文件"
|
||||||
echo "已删除 /tmp/*.pcapng 文件"
|
|
||||||
else
|
else
|
||||||
echo "没有找到 /tmp/*.pcapng 文件,跳过删除"
|
echo "没有找到 $PCAPNG_FILES_PATTERN 文件,跳过删除"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 4. 启动新的 tshark_bash.sh 并记录 PID
|
echo "重启 tshark_bash.sh..."
|
||||||
echo "正在启动新的 tshark_bash.sh..."
|
sudo nohup bash "$TSARK_SCRIPT_PATH" >> "$TSARK_LOG_FILE" 2>&1 &
|
||||||
sudo nohup bash /hook/tshark_bash.sh >> /hook/tshark_bash.log 2>&1 &
|
|
||||||
NEW_TSHARK_PID=$!
|
NEW_TSHARK_PID=$!
|
||||||
echo "$NEW_TSHARK_PID" > "$TSARK_PID_FILE"
|
echo "$NEW_TSHARK_PID" > "$TSARK_PID_FILE"
|
||||||
echo "已启动 tshark_bash.sh,新 PID: $NEW_TSHARK_PID,已写入 $TSARK_PID_FILE"
|
echo "已启动 tshark_bash.sh,新 PID: $NEW_TSHARK_PID,已写入 $TSARK_PID_FILE"
|
||||||
@ -68,7 +77,6 @@ while true; do
|
|||||||
else
|
else
|
||||||
echo "$(date): 目标进程 (PID: ${TARGET_PID:-未知}) 不存在或已挂掉,执行清理操作..."
|
echo "$(date): 目标进程 (PID: ${TARGET_PID:-未知}) 不存在或已挂掉,执行清理操作..."
|
||||||
|
|
||||||
# 1. 杀掉 tshark_bash 进程
|
|
||||||
if [[ -f "$TSARK_PID_FILE" ]]; then
|
if [[ -f "$TSARK_PID_FILE" ]]; then
|
||||||
TSHARK_PID=$(cat "$TSARK_PID_FILE" 2>/dev/null)
|
TSHARK_PID=$(cat "$TSARK_PID_FILE" 2>/dev/null)
|
||||||
if [[ "$TSHARK_PID" =~ ^[0-9]+$ ]]; then
|
if [[ "$TSHARK_PID" =~ ^[0-9]+$ ]]; then
|
||||||
@ -81,12 +89,11 @@ while true; do
|
|||||||
echo "tshark_bash.pid 文件不存在,跳过 kill 操作"
|
echo "tshark_bash.pid 文件不存在,跳过 kill 操作"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 2. 删除 /tmp/*.pcapng
|
if ls $PCAPNG_FILES_PATTERN >/dev/null 2>&1; then
|
||||||
if ls /tmp/*.pcapng >/dev/null 2>&1; then
|
rm -rf $PCAPNG_FILES_PATTERN
|
||||||
rm -rf /tmp/*.pcapng
|
echo "已删除 $PCAPNG_FILES_PATTERN 文件"
|
||||||
echo "已删除 /tmp/*.pcapng 文件"
|
|
||||||
else
|
else
|
||||||
echo "没有找到 /tmp/*.pcapng 文件,跳过删除"
|
echo "没有找到 $PCAPNG_FILES_PATTERN 文件,跳过删除"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "目标进程已挂掉,已执行清理操作。"
|
echo "目标进程已挂掉,已执行清理操作。"
|
||||||
@ -95,7 +102,6 @@ while true; do
|
|||||||
echo "$(date): 目标 PID 文件 $TARGET_PID_FILE 不存在,跳过本次检查。"
|
echo "$(date): 目标 PID 文件 $TARGET_PID_FILE 不存在,跳过本次检查。"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 等待 5 分钟 (300 秒)
|
|
||||||
sleep 300
|
sleep 300
|
||||||
done
|
done
|
||||||
```
|
```
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user