远程与运维(按官方 Gateway Runbook)
日常运维核心命令
bash
# 状态检查
openclaw gateway status # 基础状态
openclaw gateway status --deep # 深度检查
openclaw status # 整体状态
# 日志查看
openclaw logs --follow # 实时日志
openclaw logs --filter error # 错误日志
openclaw logs --tail 100 # 最近 100 条
openclaw logs --export # 导出日志
# 系统诊断
openclaw doctor # 全面诊断
openclaw health # 健康检查
openclaw channels status --probe # 渠道探测
# 安全审计
openclaw security audit # 基础审计
openclaw security audit --deep # 深度审计参考:
- https://docs.openclaw.ai/gateway/index.md
- https://docs.openclaw.ai/gateway/troubleshooting.md
- https://docs.openclaw.ai/gateway/remote.md
远程接入方案
方案 1:Tailscale(推荐)
优点: 安全、简单、无需额外配置
bash
# 安装 Tailscale
curl -fsSL https://tailscale.com/install.sh | sh
# 连接到网络
tailscale up
# 获取 Tailscale IP
tailscale ip
# 配置 OpenClaw 使用 Tailscale IP
openclaw config set gateway.host 100.x.y.z
# 重启网关
openclaw gateway restart方案 2:SSH 隧道
优点: 临时访问、无需额外服务
bash
# 创建 SSH 隧道
ssh -N -L 18789:127.0.0.1:18789 user@remote-host
# 后台运行
ssh -fN -L 18789:127.0.0.1:18789 user@remote-host
# 使用 autossh 持久化
autossh -M 0 -f -N \
-L 18789:127.0.0.1:18789 \
-o ServerAliveInterval=30 \
-o ServerAliveCountMax=3 \
user@remote-host
# 本地访问
openclaw --api-url http://localhost:18789方案 3:VPN
优点: 企业级安全、支持多服务
txt
# WireGuard 配置示例
# /etc/wireguard/wg0.conf
[Interface]
Address = 10.8.0.2/24
PrivateKey = <client-private-key>
[Peer]
PublicKey = <server-public-key>
Endpoint = <server-ip>:51820
AllowedIPs = 10.8.0.0/24
# 启动 VPN
wg-quick up wg0
# 配置 OpenClaw
openclaw config set gateway.host 10.8.0.2方案 4:公网暴露(需谨慎)
警告: 仅在完善鉴权与代理策略后使用
yaml
# 暴露公网配置
gateway:
host: 0.0.0.0
port: 18789
# 必须启用认证
auth:
enabled: true
type: token
token: ${SECURE_TOKEN}
# 必须启用 TLS
tls:
enabled: true
cert: /path/to/cert.pem
key: /path/to/key.pem
# 必须启用限流
rate_limit:
enabled: true
requests_per_minute: 30
burst: 5Day-2 运维节奏
每日检查
bash
#!/bin/bash
# daily-check.sh
echo "=== Daily Health Check ==="
echo "Date: $(date)"
echo ""
# 1. 网关状态
echo "1. Gateway Status:"
openclaw gateway status --json | jq '{status, uptime, version}'
# 2. 渠道状态
echo ""
echo "2. Channels:"
openclaw channels status --json | jq '.[] | {name, status, latency_ms}'
# 3. 错误统计
echo ""
echo "3. Errors (last 24h):"
openclaw logs --filter error --since 24h | wc -l
# 4. 使用统计
echo ""
echo "4. Usage:"
openclaw stats usage --period day --json | jq '{requests, tokens, cost}'
echo ""
echo "=== Check Complete ==="每周检查
bash
#!/bin/bash
# weekly-check.sh
echo "=== Weekly Health Check ==="
echo "Date: $(date)"
echo ""
# 1. 完整诊断
echo "1. Full Diagnosis:"
openclaw doctor --json | jq '.'
# 2. 安全审计
echo ""
echo "2. Security Audit:"
openclaw security audit --deep --json | jq '{issues: .issues | length, critical: [.issues[] | select(.severity == "critical")] | length}'
# 3. 成本报告
echo ""
echo "3. Cost Report:"
openclaw stats cost --period week --json | jq '.'
# 4. 失败样本分析
echo ""
echo "4. Failure Analysis:"
openclaw logs --filter error --since 7d --export errors-week.json
echo ""
echo "=== Check Complete ==="每月检查
bash
#!/bin/bash
# monthly-check.sh
echo "=== Monthly Review ==="
echo "Date: $(date)"
echo ""
# 1. 版本检查
echo "1. Version Check:"
openclaw --version
echo "Latest: $(curl -s https://api.github.com/repos/openclaw/openclaw/releases/latest | jq -r '.tag_name')"
# 2. 配置审计
echo ""
echo "2. Config Audit:"
openclaw config validate
openclaw config diff --base default
# 3. 成本趋势
echo ""
echo "3. Cost Trend:"
openclaw stats cost --period month --by model --json | jq '.'
# 4. 备份检查
echo ""
echo "4. Backup Check:"
ls -la ~/.openclaw/backups/ 2>/dev/null || echo "No backups found"
echo ""
echo "=== Review Complete ==="故障分级处理
P0:网关不可用/全渠道中断
影响: 所有服务不可用
处理:
bash
# 1. 立即检查进程
ps aux | grep openclaw
# 2. 尝试重启
openclaw gateway restart
# 3. 检查日志
openclaw logs --tail 100
# 4. 如果无法启动,检查配置
openclaw config validate
# 5. 必要时回滚
openclaw config import --input config-backup.yamlP1:单渠道故障
影响: 部分用户无法使用
处理:
bash
# 1. 检查渠道状态
openclaw channels status --probe
# 2. 查看渠道日志
openclaw logs --filter "channel:<channel-name>"
# 3. 尝试重新配对
openclaw channels pair <channel-name> --force
# 4. 禁用问题渠道
openclaw channels disable <channel-name>
# 5. 通知用户使用备用渠道P2:单能力故障
影响: 特定功能不可用
处理:
bash
# 1. 检查工具状态
openclaw tools list
# 2. 检查权限配置
openclaw config get tools
# 3. 检查 Provider
openclaw providers test
# 4. 禁用问题工具
openclaw config set tools.deny '["problem-tool"]'
# 5. 通知用户并提供替代方案监控与告警
关键指标
txt
metrics:
# 可用性
- name: gateway_uptime
type: gauge
alert:
critical: < 99%
- name: channel_availability
type: gauge
alert:
warning: < 95%
critical: < 90%
- name: error_rate
type: rate
alert:
warning: > 1%
critical: > 5%
# 性能
- name: response_time_p95
type: latency
alert:
warning: > 5s
critical: > 10s
- name: request_rate
type: rate
alert:
warning: "> 2x baseline"
# 成本
- name: daily_cost
type: currency
alert:
warning: "> $50"
critical: "> $100"告警配置
yaml
alerts:
- name: gateway_down
condition: gateway_status != "running"
severity: critical
notify: [admin, ops]
- name: high_error_rate
condition: error_rate > 5%
severity: critical
notify: [admin, ops]
- name: channel_disconnected
condition: channel_status != "active"
severity: warning
notify: [admin]
- name: high_latency
condition: p99_latency > 10s
severity: warning
notify: [admin]
- name: cost_spike
condition: daily_cost > baseline * 2
severity: warning
notify: [admin, pm]通知渠道
yaml
notifications:
email:
enabled: true
recipients:
- admin@example.com
- ops@example.com
slack:
enabled: true
webhook: ${SLACK_WEBHOOK_URL}
discord:
enabled: true
webhook: ${DISCORD_WEBHOOK_URL}
pagerduty:
enabled: true
service_key: ${PAGERDUTY_SERVICE_KEY}
severity_filter: [critical]