文档
Linux 例程:Shell 脚本——服务器资源监控
目标
写一个实用的 Bash 监控脚本,展示 Linux 管道、重定向、grep/awk/sed 核心技法。
完整脚本
#!/bin/bash
# monitor.sh —— 轻量服务器资源监控
set -euo pipefail # 严格模式:遇错即停、未定义变量报错、管道失败报错
# ── 配置 ──
LOG_FILE="/tmp/monitor_$(date +%Y%m%d).log"
ALERT_CPU=80 # CPU 告警阈值 %
ALERT_MEM=90 # 内存告警阈值 %
# ── 工具函数 ──
timestamp() {
date "+%Y-%m-%d %H:%M:%S"
}
log() {
echo "[$(timestamp)] $*" | tee -a "$LOG_FILE"
}
# ── 监控项 ──
check_cpu() {
# 从 /proc/stat 计算 CPU 使用率
local cpu_line
cpu_line=$(grep '^cpu ' /proc/stat)
local user system idle
read -r _ user _ system _ idle _ <<< "$cpu_line"
local total=$((user + system + idle))
local used=$((user + system))
# 等 1 秒再读一次(计算差值才能得百分比)
sleep 1
cpu_line=$(grep '^cpu ' /proc/stat)
read -r _ user2 _ system2 _ idle2 _ <<< "$cpu_line"
local total2=$((user2 + system2 + idle2))
local used2=$((user2 + system2))
local cpu_pct=$(( 100 * (used2 - used) / (total2 - total) ))
echo "$cpu_pct"
}
check_memory() {
local total used free pct
read -r total used free <<< "$(free -m | awk '/^Mem:/ {print $2, $3, $4}')"
pct=$(( used * 100 / total ))
log "内存: ${used}MB / ${total}MB (${pct}%)"
if [ "$pct" -ge "$ALERT_MEM" ]; then
log "⚠️ 内存使用率过高: ${pct}%"
fi
}
check_disk() {
df -h / /home 2>/dev/null | awk 'NR>1 {
gsub(/%/,"",$5)
printf "磁盘 %s: %s / %s (%d%%)\n", $6, $3, $2, $5
if ($5+0 > 85) print "⚠️ 磁盘空间不足: " $6
}' | while read -r line; do log "$line"; done
}
check_network() {
local conns
conns=$(ss -tun state established | tail -n +2 | wc -l)
log "活跃 TCP 连接数: $conns"
}
check_top_processes() {
log "=== TOP 5 进程(CPU)==="
ps aux --sort=-%cpu | head -6 | tail -5 | while read -r line; do
local user pid cpu mem cmd
read -r user pid cpu mem _ _ _ _ _ _ _ cmd <<< "$line"
log " PID=${pid} CPU=${cpu}% MEM=${mem}% CMD=${cmd:0:50}"
done
}
# ── 主循环 ──
main() {
log "======== 监控开始 ========"
local cpu
cpu=$(check_cpu)
log "CPU 使用率: ${cpu}%"
if [ "$cpu" -ge "$ALERT_CPU" ]; then
log "⚠️ CPU 使用率过高: ${cpu}%"
fi
check_memory
check_disk
check_network
check_top_processes
log "======== 监控结束 ========"
}
# 如果传入 --watch 参数,循环执行
if [ "${1:-}" = "--watch" ]; then
while true; do
clear
main
sleep 5
done
else
main
fi
运行步骤
chmod +x monitor.sh
./monitor.sh # 执行一次
./monitor.sh --watch # 持续监控
预期输出
[2024-01-15 14:30:00] ======== 监控开始 ========
[2024-01-15 14:30:01] CPU 使用率: 23%
[2024-01-15 14:30:01] 内存: 2048MB / 8192MB (25%)
[2024-01-15 14:30:01] 磁盘 /: 45G / 100G (45%)
[2024-01-15 14:30:01] 活跃 TCP 连接数: 12
[2024-01-15 14:30:01] ======== 监控结束 ========
关键 Shell 技法
| 技法 | 示例 |
|---|---|
| 管道 | `ps aux |
| 重定向 | echo log >> file.log 2>&1 |
| 命令替换 | $(date) |
| Here String | <<< "$variable" |
| 条件判断 | [ "$a" -ge 80 ] |
| 循环读取 | while read -r line; do ... done |