mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-14 05:05:59 +08:00
[Observability][Bugfix] Fix expected an instance of Token, got None error in OpenTelemetry (#18934)
This commit is contained in:
parent
7a62202392
commit
8299614e60
@ -52,7 +52,6 @@ def initialize_extensions(app: DifyApp):
|
||||
ext_mail,
|
||||
ext_migrate,
|
||||
ext_otel,
|
||||
ext_otel_patch,
|
||||
ext_proxy_fix,
|
||||
ext_redis,
|
||||
ext_repositories,
|
||||
@ -85,7 +84,6 @@ def initialize_extensions(app: DifyApp):
|
||||
ext_proxy_fix,
|
||||
ext_blueprints,
|
||||
ext_commands,
|
||||
ext_otel_patch, # Apply patch before initializing OpenTelemetry
|
||||
ext_otel,
|
||||
]
|
||||
for ext in extensions:
|
||||
|
@ -8,35 +8,77 @@ from typing import Union
|
||||
|
||||
from celery.signals import worker_init # type: ignore
|
||||
from flask_login import user_loaded_from_request, user_logged_in # type: ignore
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.instrumentation.celery import CeleryInstrumentor
|
||||
from opentelemetry.instrumentation.flask import FlaskInstrumentor
|
||||
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
|
||||
from opentelemetry.metrics import get_meter, get_meter_provider, set_meter_provider
|
||||
from opentelemetry.propagate import set_global_textmap
|
||||
from opentelemetry.propagators.b3 import B3Format
|
||||
from opentelemetry.propagators.composite import CompositePropagator
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import (
|
||||
BatchSpanProcessor,
|
||||
ConsoleSpanExporter,
|
||||
)
|
||||
from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio
|
||||
from opentelemetry.semconv.resource import ResourceAttributes
|
||||
from opentelemetry.trace import Span, get_current_span, get_tracer_provider, set_tracer_provider
|
||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||
from opentelemetry.trace.status import StatusCode
|
||||
|
||||
from configs import dify_config
|
||||
from dify_app import DifyApp
|
||||
|
||||
|
||||
class ExceptionLoggingHandler(logging.Handler):
|
||||
@user_logged_in.connect
|
||||
@user_loaded_from_request.connect
|
||||
def on_user_loaded(_sender, user):
|
||||
if dify_config.ENABLE_OTEL:
|
||||
from opentelemetry.trace import get_current_span
|
||||
|
||||
if user:
|
||||
current_span = get_current_span()
|
||||
if current_span:
|
||||
current_span.set_attribute("service.tenant.id", user.current_tenant_id)
|
||||
current_span.set_attribute("service.user.id", user.id)
|
||||
|
||||
|
||||
def init_app(app: DifyApp):
|
||||
def is_celery_worker():
|
||||
return "celery" in sys.argv[0].lower()
|
||||
|
||||
def instrument_exception_logging():
|
||||
exception_handler = ExceptionLoggingHandler()
|
||||
logging.getLogger().addHandler(exception_handler)
|
||||
|
||||
def init_flask_instrumentor(app: DifyApp):
|
||||
meter = get_meter("http_metrics", version=dify_config.CURRENT_VERSION)
|
||||
_http_response_counter = meter.create_counter(
|
||||
"http.server.response.count", description="Total number of HTTP responses by status code", unit="{response}"
|
||||
)
|
||||
|
||||
def response_hook(span: Span, status: str, response_headers: list):
|
||||
if span and span.is_recording():
|
||||
if status.startswith("2"):
|
||||
span.set_status(StatusCode.OK)
|
||||
else:
|
||||
span.set_status(StatusCode.ERROR, status)
|
||||
|
||||
status = status.split(" ")[0]
|
||||
status_code = int(status)
|
||||
status_class = f"{status_code // 100}xx"
|
||||
_http_response_counter.add(1, {"status_code": status_code, "status_class": status_class})
|
||||
|
||||
instrumentor = FlaskInstrumentor()
|
||||
if dify_config.DEBUG:
|
||||
logging.info("Initializing Flask instrumentor")
|
||||
instrumentor.instrument_app(app, response_hook=response_hook)
|
||||
|
||||
def init_sqlalchemy_instrumentor(app: DifyApp):
|
||||
with app.app_context():
|
||||
engines = list(app.extensions["sqlalchemy"].engines.values())
|
||||
SQLAlchemyInstrumentor().instrument(enable_commenter=True, engines=engines)
|
||||
|
||||
def setup_context_propagation():
|
||||
# Configure propagators
|
||||
set_global_textmap(
|
||||
CompositePropagator(
|
||||
[
|
||||
TraceContextTextMapPropagator(), # W3C trace context
|
||||
B3Format(), # B3 propagation (used by many systems)
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
def shutdown_tracer():
|
||||
provider = trace.get_tracer_provider()
|
||||
if hasattr(provider, "force_flush"):
|
||||
provider.force_flush()
|
||||
|
||||
class ExceptionLoggingHandler(logging.Handler):
|
||||
"""Custom logging handler that creates spans for logging.exception() calls"""
|
||||
|
||||
def emit(self, record):
|
||||
@ -60,19 +102,30 @@ class ExceptionLoggingHandler(logging.Handler):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.instrumentation.celery import CeleryInstrumentor
|
||||
from opentelemetry.instrumentation.flask import FlaskInstrumentor
|
||||
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
|
||||
from opentelemetry.metrics import get_meter, get_meter_provider, set_meter_provider
|
||||
from opentelemetry.propagate import set_global_textmap
|
||||
from opentelemetry.propagators.b3 import B3Format
|
||||
from opentelemetry.propagators.composite import CompositePropagator
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import (
|
||||
BatchSpanProcessor,
|
||||
ConsoleSpanExporter,
|
||||
)
|
||||
from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio
|
||||
from opentelemetry.semconv.resource import ResourceAttributes
|
||||
from opentelemetry.trace import Span, get_tracer_provider, set_tracer_provider
|
||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||
from opentelemetry.trace.status import StatusCode
|
||||
|
||||
@user_logged_in.connect
|
||||
@user_loaded_from_request.connect
|
||||
def on_user_loaded(_sender, user):
|
||||
if user:
|
||||
current_span = get_current_span()
|
||||
if current_span:
|
||||
current_span.set_attribute("service.tenant.id", user.current_tenant_id)
|
||||
current_span.set_attribute("service.user.id", user.id)
|
||||
|
||||
|
||||
def init_app(app: DifyApp):
|
||||
if dify_config.ENABLE_OTEL:
|
||||
setup_context_propagation()
|
||||
# Initialize OpenTelemetry
|
||||
# Follow Semantic Convertions 1.32.0 to define resource attributes
|
||||
@ -133,67 +186,19 @@ def init_app(app: DifyApp):
|
||||
atexit.register(shutdown_tracer)
|
||||
|
||||
|
||||
def is_celery_worker():
|
||||
return "celery" in sys.argv[0].lower()
|
||||
|
||||
|
||||
def instrument_exception_logging():
|
||||
exception_handler = ExceptionLoggingHandler()
|
||||
logging.getLogger().addHandler(exception_handler)
|
||||
|
||||
|
||||
def init_flask_instrumentor(app: DifyApp):
|
||||
meter = get_meter("http_metrics", version=dify_config.CURRENT_VERSION)
|
||||
_http_response_counter = meter.create_counter(
|
||||
"http.server.response.count", description="Total number of HTTP responses by status code", unit="{response}"
|
||||
)
|
||||
|
||||
def response_hook(span: Span, status: str, response_headers: list):
|
||||
if span and span.is_recording():
|
||||
if status.startswith("2"):
|
||||
span.set_status(StatusCode.OK)
|
||||
else:
|
||||
span.set_status(StatusCode.ERROR, status)
|
||||
|
||||
status = status.split(" ")[0]
|
||||
status_code = int(status)
|
||||
status_class = f"{status_code // 100}xx"
|
||||
_http_response_counter.add(1, {"status_code": status_code, "status_class": status_class})
|
||||
|
||||
instrumentor = FlaskInstrumentor()
|
||||
if dify_config.DEBUG:
|
||||
logging.info("Initializing Flask instrumentor")
|
||||
instrumentor.instrument_app(app, response_hook=response_hook)
|
||||
|
||||
|
||||
def init_sqlalchemy_instrumentor(app: DifyApp):
|
||||
with app.app_context():
|
||||
engines = list(app.extensions["sqlalchemy"].engines.values())
|
||||
SQLAlchemyInstrumentor().instrument(enable_commenter=True, engines=engines)
|
||||
|
||||
|
||||
def setup_context_propagation():
|
||||
# Configure propagators
|
||||
set_global_textmap(
|
||||
CompositePropagator(
|
||||
[
|
||||
TraceContextTextMapPropagator(), # W3C trace context
|
||||
B3Format(), # B3 propagation (used by many systems)
|
||||
]
|
||||
)
|
||||
)
|
||||
def is_enabled():
|
||||
return dify_config.ENABLE_OTEL
|
||||
|
||||
|
||||
@worker_init.connect(weak=False)
|
||||
def init_celery_worker(*args, **kwargs):
|
||||
if dify_config.ENABLE_OTEL:
|
||||
from opentelemetry.instrumentation.celery import CeleryInstrumentor
|
||||
from opentelemetry.metrics import get_meter_provider
|
||||
from opentelemetry.trace import get_tracer_provider
|
||||
|
||||
tracer_provider = get_tracer_provider()
|
||||
metric_provider = get_meter_provider()
|
||||
if dify_config.DEBUG:
|
||||
logging.info("Initializing OpenTelemetry for Celery worker")
|
||||
CeleryInstrumentor(tracer_provider=tracer_provider, meter_provider=metric_provider).instrument()
|
||||
|
||||
|
||||
def shutdown_tracer():
|
||||
provider = trace.get_tracer_provider()
|
||||
if hasattr(provider, "force_flush"):
|
||||
provider.force_flush()
|
||||
|
@ -1,63 +0,0 @@
|
||||
"""
|
||||
Patch for OpenTelemetry context detach method to handle None tokens gracefully.
|
||||
|
||||
This patch addresses the issue where OpenTelemetry's context.detach() method raises a TypeError
|
||||
when called with a None token. The error occurs in the contextvars_context.py file where it tries
|
||||
to call reset() on a None token.
|
||||
|
||||
Related GitHub issue: https://github.com/langgenius/dify/issues/18496
|
||||
|
||||
Error being fixed:
|
||||
```
|
||||
Traceback (most recent call last):
|
||||
File "opentelemetry/context/__init__.py", line 154, in detach
|
||||
_RUNTIME_CONTEXT.detach(token)
|
||||
File "opentelemetry/context/contextvars_context.py", line 50, in detach
|
||||
self._current_context.reset(token) # type: ignore
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
TypeError: expected an instance of Token, got None
|
||||
```
|
||||
|
||||
Instead of modifying the third-party package directly, this patch monkey-patches the
|
||||
context.detach method to gracefully handle None tokens.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from functools import wraps
|
||||
|
||||
from opentelemetry import context
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Store the original detach method
|
||||
original_detach = context.detach
|
||||
|
||||
|
||||
# Create a patched version that handles None tokens
|
||||
@wraps(original_detach)
|
||||
def patched_detach(token):
|
||||
"""
|
||||
A patched version of context.detach that handles None tokens gracefully.
|
||||
"""
|
||||
if token is None:
|
||||
logger.debug("Attempted to detach a None token, skipping")
|
||||
return
|
||||
|
||||
return original_detach(token)
|
||||
|
||||
|
||||
def is_enabled():
|
||||
"""
|
||||
Check if the extension is enabled.
|
||||
Always enable this patch to prevent errors even when OpenTelemetry is disabled.
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
def init_app(app):
|
||||
"""
|
||||
Initialize the OpenTelemetry context patch.
|
||||
"""
|
||||
# Replace the original detach method with our patched version
|
||||
context.detach = patched_detach
|
||||
logger.info("OpenTelemetry context.detach patched to handle None tokens")
|
Loading…
x
Reference in New Issue
Block a user