文档
目标
在 Python Flask 微服务中使用 OpenTelemetry SDK 自动采集 Trace,上报到 Jaeger,可视化请求链路和耗时分布。
完整代码
架构
Client → ServiceA(:5000) → ServiceB(:5001)
↓ OTLP ↓ OTLP
Jaeger (OTLP Collector :4317/4318)
1. 安装依赖
pip install flask requests \
opentelemetry-api \
opentelemetry-sdk \
opentelemetry-exporter-otlp-proto-grpc \
opentelemetry-instrumentation-flask \
opentelemetry-instrumentation-requests
2. ServiceA(Flask + 调用 ServiceB)
# service_a.py
from flask import Flask, jsonify
import requests
import time
import random
# ===== OpenTelemetry 初始化 =====
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.resources import Resource
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.instrumentation.flask import FlaskInstrumentor
from opentelemetry.instrumentation.requests import RequestsInstrumentor
# 设置 Tracer Provider
resource = Resource.create({"service.name": "service-a"})
trace.set_tracer_provider(TracerProvider(resource=resource))
# 配置 OTLP Exporter(发送到 Jaeger)
otlp_exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True)
trace.get_tracer_provider().add_span_processor(
BatchSpanProcessor(otlp_exporter)
)
# 启动 Flask 插桩
app = Flask(__name__)
FlaskInstrumentor().instrument_app(app)
RequestsInstrumentor().instrument()
tracer = trace.get_tracer(__name__)
@app.route("/process")
def process():
"""ServiceA 入口:先做本地处理,再调用 ServiceB"""
with tracer.start_as_current_span("local-computation") as span:
span.set_attribute("user.id", 42)
time.sleep(random.uniform(0.05, 0.15)) # 模拟本地计算
# 调用 ServiceB
with tracer.start_as_current_span("call-service-b") as span:
span.set_attribute("peer.service", "service-b")
try:
resp = requests.get("http://localhost:5001/analyze", timeout=5)
result = resp.json()
except Exception as e:
span.record_exception(e)
span.set_status(trace.Status(trace.StatusCode.ERROR))
result = {"error": str(e)}
return jsonify({"service": "A", "result": result})
if __name__ == "__main__":
app.run(port=5000, debug=False)
3. ServiceB(Flask)
# service_b.py
from flask import Flask, jsonify
import time
import random
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.resources import Resource
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.instrumentation.flask import FlaskInstrumentor
resource = Resource.create({"service.name": "service-b"})
trace.set_tracer_provider(TracerProvider(resource=resource))
otlp_exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True)
trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(otlp_exporter))
app = Flask(__name__)
FlaskInstrumentor().instrument_app()
tracer = trace.get_tracer(__name__)
@app.route("/analyze")
def analyze():
"""ServiceB:模拟数据分析"""
with tracer.start_as_current_span("db-query") as span:
span.set_attribute("db.statement", "SELECT * FROM events WHERE id=42")
time.sleep(random.uniform(0.1, 0.3)) # 模拟 DB 查询
with tracer.start_as_current_span("ml-inference") as span:
span.set_attribute("model.name", "recommender-v3")
span.set_attribute("model.latency_ms", random.randint(200, 500))
time.sleep(random.uniform(0.2, 0.5)) # 模拟 ML 推理
return jsonify({"status": "completed", "score": round(random.random(), 2)})
if __name__ == "__main__":
app.run(port=5001, debug=False)
运行步骤
# 1. 启动 Jaeger
docker run -d --name jaeger \
-e COLLECTOR_OTLP_ENABLED=true \
-p 16686:16686 -p 4317:4317 -p 4318:4318 \
jaegertracing/all-in-one:1.57
# 2. 启动 ServiceB
python service_b.py &
# 3. 启动 ServiceA
python service_a.py &
# 4. 发送请求
curl http://localhost:5000/process
# 5. 打开 Jaeger UI
open http://localhost:16686
# 6. 选择 Service: service-a → Find Traces
预期输出
// curl 响应
{
"service": "A",
"result": {
"status": "completed",
"score": 0.87
}
}
Jaeger UI 中看到的 Trace:
service-a.process
├── local-computation (120ms) ← 自定义 Span
├── call-service-b (450ms) ← 自定义 Span
│ └── service-b.analyze
│ ├── db-query (250ms) ← 自定义 Span
│ └── ml-inference (200ms) ← 自定义 Span
Total: ~570ms
关键点
Resource设置service.name是 Jaeger 分组的关键- 自动插桩(
FlaskInstrumentor)无需修改业务代码 tracer.start_as_current_span创建自定义 Span 记录关键步骤- OTLP
insecure=True仅在本地开发使用,生产需配置 TLS BatchSpanProcessor批量上报,降低网络开销