[{"data":1,"prerenderedAt":2813},["ShallowReactive",2],{"\u002Fguides\u002Fopentelemetry-rag-observability-navigation":3,"\u002Fguides\u002Fopentelemetry-rag-observability":208},[4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84,88,92,96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204],{"title":5,"path":6,"stem":7},"Guides","\u002Fguides","guides\u002Findex",{"title":9,"path":10,"stem":11},"OpenTelemetry Beego monitoring [otelbeego]","\u002Fguides\u002Fopentelemetry-beego","guides\u002Fopentelemetry-beego",{"title":13,"path":14,"stem":15},"OpenTelemetry Celery Instrumentation Guide","\u002Fguides\u002Fopentelemetry-celery","guides\u002Fopentelemetry-celery",{"title":17,"path":18,"stem":19},"Instrumenting Go database\u002Fsql with OpenTelemetry [otelsql]","\u002Fguides\u002Fopentelemetry-database-sql","guides\u002Fopentelemetry-database-sql",{"title":21,"path":22,"stem":23},"OpenTelemetry Django: Traces, Metrics and Database Monitoring","\u002Fguides\u002Fopentelemetry-django","guides\u002Fopentelemetry-django",{"title":25,"path":26,"stem":27},"OpenTelemetry Docker Monitoring with Collector and Docker Stats","\u002Fguides\u002Fopentelemetry-docker","guides\u002Fopentelemetry-docker",{"title":29,"path":30,"stem":31},"OpenTelemetry Echo by Labstack [otelecho]","\u002Fguides\u002Fopentelemetry-echo","guides\u002Fopentelemetry-echo",{"title":33,"path":34,"stem":35},"OpenTelemetry Go Ent monitoring [otelent]","\u002Fguides\u002Fopentelemetry-ent","guides\u002Fopentelemetry-ent",{"title":37,"path":38,"stem":39},"OpenTelemetry Express.js instrumentation","\u002Fguides\u002Fopentelemetry-express","guides\u002Fopentelemetry-express",{"title":41,"path":42,"stem":43},"OpenTelemetry Falcon Instrumentation and Monitoring","\u002Fguides\u002Fopentelemetry-falcon","guides\u002Fopentelemetry-falcon",{"title":45,"path":46,"stem":47},"OpenTelemetry FastAPI Instrumentation and Monitoring","\u002Fguides\u002Fopentelemetry-fastapi","guides\u002Fopentelemetry-fastapi",{"title":49,"path":50,"stem":51},"OpenTelemetry Filelog Receiver","\u002Fguides\u002Fopentelemetry-filelog-receiver","guides\u002Fopentelemetry-filelog-receiver",{"title":53,"path":54,"stem":55},"OpenTelemetry Flask Instrumentation and Monitoring","\u002Fguides\u002Fopentelemetry-flask","guides\u002Fopentelemetry-flask",{"title":57,"path":58,"stem":59},"OpenTelemetry Gin Monitoring [otelgin]","\u002Fguides\u002Fopentelemetry-gin","guides\u002Fopentelemetry-gin",{"title":61,"path":62,"stem":63},"OpenTelemetry Golang gRPC monitoring [otelgrpc]","\u002Fguides\u002Fopentelemetry-go-grpc","guides\u002Fopentelemetry-go-grpc",{"title":65,"path":66,"stem":67},"OpenTelemetry Go AWS Lambda Instrumentation","\u002Fguides\u002Fopentelemetry-go-lambda","guides\u002Fopentelemetry-go-lambda",{"title":69,"path":70,"stem":71},"OpenTelemetry Go-Zero monitoring [otelzero]","\u002Fguides\u002Fopentelemetry-go-zero","guides\u002Fopentelemetry-go-zero",{"title":73,"path":74,"stem":75},"OpenTelemetry Gorilla Mux monitoring [otelmux]","\u002Fguides\u002Fopentelemetry-gorilla-mux","guides\u002Fopentelemetry-gorilla-mux",{"title":77,"path":78,"stem":79},"OpenTelemetry GORM monitoring [otelgorm]","\u002Fguides\u002Fopentelemetry-gorm","guides\u002Fopentelemetry-gorm",{"title":81,"path":82,"stem":83},"OpenTelemetry HTTPcheck Receiver","\u002Fguides\u002Fopentelemetry-httpcheck","guides\u002Fopentelemetry-httpcheck",{"title":85,"path":86,"stem":87},"OpenTelemetry Kubernetes Events Receiver [k8seventsreceiver]","\u002Fguides\u002Fopentelemetry-k8seventsreceiver","guides\u002Fopentelemetry-k8seventsreceiver",{"title":89,"path":90,"stem":91},"Kafka Monitoring with OpenTelemetry Collector","\u002Fguides\u002Fopentelemetry-kafka","guides\u002Fopentelemetry-kafka",{"title":93,"path":94,"stem":95},"OpenTelemetry Integration for Laravel: Full Guide","\u002Fguides\u002Fopentelemetry-laravel","guides\u002Fopentelemetry-laravel",{"title":97,"path":98,"stem":99},"OpenTelemetry Log4j logs [Java]","\u002Fguides\u002Fopentelemetry-log4j","guides\u002Fopentelemetry-log4j",{"title":101,"path":102,"stem":103},"OpenTelemetry Logback logging [Java]","\u002Fguides\u002Fopentelemetry-logback","guides\u002Fopentelemetry-logback",{"title":105,"path":106,"stem":107},"OpenTelemetry Logrus logs [otellogrus]","\u002Fguides\u002Fopentelemetry-logrus","guides\u002Fopentelemetry-logrus",{"title":109,"path":110,"stem":111},"OpenTelemetry MySQL Monitoring [step by step]","\u002Fguides\u002Fopentelemetry-mysql","guides\u002Fopentelemetry-mysql",{"title":113,"path":114,"stem":115},"OpenTelemetry NestJS Instrumentation","\u002Fguides\u002Fopentelemetry-nestjs","guides\u002Fopentelemetry-nestjs",{"title":117,"path":118,"stem":119},"OpenTelemetry Go net\u002Fhttp Instrumentation [otelhttp]","\u002Fguides\u002Fopentelemetry-net-http","guides\u002Fopentelemetry-net-http",{"title":121,"path":122,"stem":123},"OpenTelemetry Next.js: Tracing for App Router and Pages Router","\u002Fguides\u002Fopentelemetry-nextjs","guides\u002Fopentelemetry-nextjs",{"title":125,"path":126,"stem":127},"OpenTelemetry NGINX Instrumentation","\u002Fguides\u002Fopentelemetry-nginx","guides\u002Fopentelemetry-nginx",{"title":129,"path":130,"stem":131},"OpenTelemetry Node.js AWS Lambda","\u002Fguides\u002Fopentelemetry-node-lambda","guides\u002Fopentelemetry-node-lambda",{"title":133,"path":134,"stem":135},"OpenTelemetry OpenAI Instrumentation","\u002Fguides\u002Fopentelemetry-openai","guides\u002Fopentelemetry-openai",{"title":137,"path":138,"stem":139},"OpenTelemetry Phoenix monitoring","\u002Fguides\u002Fopentelemetry-phoenix","guides\u002Fopentelemetry-phoenix",{"title":141,"path":142,"stem":143},"PHP-FPM Monitoring with OpenTelemetry: Metrics, Alerts, and Dashboards","\u002Fguides\u002Fopentelemetry-php-fpm","guides\u002Fopentelemetry-php-fpm",{"title":145,"path":146,"stem":147},"OpenTelemetry PostgreSQL Monitoring","\u002Fguides\u002Fopentelemetry-postgresql","guides\u002Fopentelemetry-postgresql",{"title":149,"path":150,"stem":151},"OpenTelemetry Pyramid: Instrumentation and Monitoring Guide","\u002Fguides\u002Fopentelemetry-pyramid","guides\u002Fopentelemetry-pyramid",{"title":153,"path":154,"stem":155},"OpenTelemetry Quarkus Instrumentation [Java]","\u002Fguides\u002Fopentelemetry-quarkus","guides\u002Fopentelemetry-quarkus",{"title":157,"path":158,"stem":159},"OpenTelemetry RabbitMQ Monitoring Guide","\u002Fguides\u002Fopentelemetry-rabbitmq","guides\u002Fopentelemetry-rabbitmq",{"title":161,"path":162,"stem":163},"RAG Pipeline Observability with OpenTelemetry","\u002Fguides\u002Fopentelemetry-rag-observability","guides\u002Fopentelemetry-rag-observability",{"title":165,"path":166,"stem":167},"Ruby on Rails Application Monitoring with OpenTelemetry","\u002Fguides\u002Fopentelemetry-rails","guides\u002Fopentelemetry-rails",{"title":169,"path":170,"stem":171},"Monitor Redis with OpenTelemetry Collector","\u002Fguides\u002Fopentelemetry-redis","guides\u002Fopentelemetry-redis",{"title":173,"path":174,"stem":175},"OpenTelemetry Sinatra monitoring","\u002Fguides\u002Fopentelemetry-sinatra","guides\u002Fopentelemetry-sinatra",{"title":177,"path":178,"stem":179},"OpenTelemetry Slim Framework: Instrumentation and Monitoring Guide","\u002Fguides\u002Fopentelemetry-slim","guides\u002Fopentelemetry-slim",{"title":181,"path":182,"stem":183},"OpenTelemetry Slog [otelslog]: Golang Bridge Setup & Examples","\u002Fguides\u002Fopentelemetry-slog","guides\u002Fopentelemetry-slog",{"title":185,"path":186,"stem":187},"OpenTelemetry Spring Boot: Java Agent, Starter, and Manual Instrumentation","\u002Fguides\u002Fopentelemetry-spring-boot","guides\u002Fopentelemetry-spring-boot",{"title":189,"path":190,"stem":191},"OpenTelemetry SQLAlchemy monitoring","\u002Fguides\u002Fopentelemetry-sqlalchemy","guides\u002Fopentelemetry-sqlalchemy",{"title":193,"path":194,"stem":195},"OpenTelemetry Integration for Symfony: Full Guide","\u002Fguides\u002Fopentelemetry-symfony","guides\u002Fopentelemetry-symfony",{"title":197,"path":198,"stem":199},"OpenTelemetry Syslog Receiver","\u002Fguides\u002Fopentelemetry-syslog-receiver","guides\u002Fopentelemetry-syslog-receiver",{"title":201,"path":202,"stem":203},"OpenTelemetry Tomcat: Instrumentation and Monitoring Guide","\u002Fguides\u002Fopentelemetry-tomcat","guides\u002Fopentelemetry-tomcat",{"title":205,"path":206,"stem":207},"OpenTelemetry Zap [otelzap]: Golang Logging Bridge Setup & Examples","\u002Fguides\u002Fopentelemetry-zap","guides\u002Fopentelemetry-zap",{"page":209,"surround":2808},{"id":210,"title":161,"author":211,"author_site":212,"body":213,"date":2796,"description":2797,"extension":2798,"image":2799,"meta":2800,"navigation":827,"path":162,"seo":2806,"stem":163,"__hash__":2807},"guides\u002Fguides\u002Fopentelemetry-rag-observability.md","abandurchin",null,{"type":214,"value":215,"toc":2779},"minimark",[216,220,223,231,244,249,252,259,265,271,277,291,294,298,305,333,338,406,410,483,489,493,537,541,584,588,662,679,683,686,708,711,739,1024,1063,1069,1093,1104,1108,1115,1131,1465,1475,1498,1506,1510,1513,2404,2420,2424,2432,2495,2503,2538,2543,2547,2555,2624,2636,2640,2645,2648,2653,2676,2681,2693,2698,2712,2717,2734,2739,2746,2750,2758,2769,2775],[217,218,219],"p",{},"RAG pipeline observability is the practice of tracing every stage of a Retrieval-Augmented Generation system — from query embedding to vector search to LLM generation — so you can diagnose failures, measure latency, and track quality in production. Without it, a pipeline can return empty or truncated answers and produce no errors, no logs, and no signal that anything went wrong.",[217,221,222],{},"Standard APM tools cover HTTP requests, database queries, and service latency. They are not designed for the multi-stage data flow inside a RAG pipeline: a question passes through an embedding model, a vector database, an optional reranker, a context assembly step, and finally an LLM. Each stage can fail independently. Standard APM sees only the outer request; it cannot tell you which stage took 800ms or why the LLM received 0 retrieved chunks. For that you need distributed tracing with span-level attributes specific to retrieval-augmented generation.",[217,224,225,226,230],{},"For ",[227,228,229],"a",{"href":134},"OpenAI instrumentation with OpenTelemetry",", the story is simpler — one model, one API call. RAG pipelines add retrieval complexity that requires a different instrumentation strategy.",[217,232,233,234,238,239,243],{},"If you're new to LLM instrumentation, start with our ",[227,235,237],{"href":236},"\u002Fblog\u002Fopentelemetry-ai-systems","OpenTelemetry for AI systems guide"," first — this guide assumes you're already familiar with ",[240,241,242],"code",{},"gen_ai.*"," spans and focuses specifically on the retrieval stages.",[245,246,248],"h2",{"id":247},"why-rag-pipelines-fail-silently","Why RAG Pipelines Fail Silently",[217,250,251],{},"RAG systems have five stages where failures produce no exception and no non-200 HTTP status:",[217,253,254,258],{},[255,256,257],"strong",{},"1. Embedding."," The embedding model changes between versions or is swapped for a cheaper alternative. Semantic similarity scores drift. Retrieval quality degrades slowly. No error fires.",[217,260,261,264],{},[255,262,263],{},"2. Vector search."," The query returns zero results because the index is stale, the similarity threshold is too strict, or the query was phrased unusually. The pipeline continues with an empty context and the LLM either hallucinates or returns \"I don't know.\"",[217,266,267,270],{},[255,268,269],{},"3. Reranking."," The reranker scores all candidates below a cutoff. The result set collapses from ten candidates to zero. No exception, no warning.",[217,272,273,276],{},[255,274,275],{},"4. Context assembly."," Retrieved chunks exceed the model's context window. The assembly step silently truncates. The LLM receives an incomplete prompt and generates a partial or misleading answer.",[217,278,279,282,283,286,287,290],{},[255,280,281],{},"5. LLM generation."," The response contains ",[240,284,285],{},"finish_reason = \"length\""," — the model hit the ",[240,288,289],{},"max_tokens"," limit before completing its answer. Most applications discard this field and return the truncated text to the user as if it were complete.",[217,292,293],{},"None of these stages raise exceptions by default. Without explicit instrumentation and attribute capture at each step, these bugs are invisible in production.",[245,295,297],{"id":296},"the-rag-trace-what-each-span-should-capture","The RAG Trace: What Each Span Should Capture",[217,299,300,301,304],{},"A well-instrumented RAG pipeline produces a trace with five child spans inside a parent ",[240,302,303],{},"rag.query"," span. Below are the attributes each span should carry.",[306,307,308],"blockquote",{},[217,309,310,313,314,316,317,320,321,324,325,328,329,332],{},[255,311,312],{},"Note on standards:"," The OTel GenAI semantic conventions (including ",[240,315,242],{}," attributes) carry ",[255,318,319],{},"Development"," stability — they may change in future releases. To opt into experimental GenAI attributes before they stabilize, set ",[240,322,323],{},"OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental",". The ",[240,326,327],{},"rag.*"," attributes used in this guide are ",[255,330,331],{},"custom attributes"," chosen for clarity; they are not part of any official specification. OpenInference (used by LlamaIndex auto-instrumentation) emits a different attribute schema — see the auto-instrumentation section below for details.",[334,335,337],"h3",{"id":336},"span-1-query-embedding","Span 1: Query Embedding",[339,340,341,357],"table",{},[342,343,344],"thead",{},[345,346,347,351,354],"tr",{},[348,349,350],"th",{},"Attribute",[348,352,353],{},"Example",[348,355,356],{},"Purpose",[358,359,360,376,391],"tbody",{},[345,361,362,368,373],{},[363,364,365],"td",{},[240,366,367],{},"rag.query.text",[363,369,370],{},[240,371,372],{},"\"What is chunking?\"",[363,374,375],{},"The raw user query",[345,377,378,383,388],{},[363,379,380],{},[240,381,382],{},"rag.embedding.model",[363,384,385],{},[240,386,387],{},"text-embedding-3-small",[363,389,390],{},"Model used for embedding",[345,392,393,398,403],{},[363,394,395],{},[240,396,397],{},"rag.embedding.duration_ms",[363,399,400],{},[240,401,402],{},"42",[363,404,405],{},"Latency for the embed call",[334,407,409],{"id":408},"span-2-vector-search","Span 2: Vector Search",[339,411,412,422],{},[342,413,414],{},[345,415,416,418,420],{},[348,417,350],{},[348,419,353],{},[348,421,356],{},[358,423,424,438,453,468],{},[345,425,426,431,435],{},[363,427,428],{},[240,429,430],{},"rag.retrieval.query",[363,432,433],{},[240,434,372],{},[363,436,437],{},"Query sent to the vector DB",[345,439,440,445,450],{},[363,441,442],{},[240,443,444],{},"rag.retrieval.top_k",[363,446,447],{},[240,448,449],{},"5",[363,451,452],{},"Number of results requested",[345,454,455,460,465],{},[363,456,457],{},[240,458,459],{},"rag.retrieval.results_count",[363,461,462],{},[240,463,464],{},"0",[363,466,467],{},"Number of results returned",[345,469,470,475,480],{},[363,471,472],{},[240,473,474],{},"rag.retrieval.empty_result",[363,476,477],{},[240,478,479],{},"true",[363,481,482],{},"Boolean flag for empty retrieval",[217,484,485,486,488],{},"The ",[240,487,474],{}," boolean is the most important attribute in the entire trace. It lets you filter for all requests where retrieval failed silently.",[334,490,492],{"id":491},"span-3-reranking-optional","Span 3: Reranking (optional)",[339,494,495,505],{},[342,496,497],{},[345,498,499,501,503],{},[348,500,350],{},[348,502,353],{},[348,504,356],{},[358,506,507,522],{},[345,508,509,514,519],{},[363,510,511],{},[240,512,513],{},"rag.reranking.model",[363,515,516],{},[240,517,518],{},"cohere-rerank-v3",[363,520,521],{},"Reranker model",[345,523,524,529,534],{},[363,525,526],{},[240,527,528],{},"rag.reranking.scores",[363,530,531],{},[240,532,533],{},"[0.91, 0.74, 0.52]",[363,535,536],{},"Score list for retrieved chunks",[334,538,540],{"id":539},"span-4-context-assembly","Span 4: Context Assembly",[339,542,543,553],{},[342,544,545],{},[345,546,547,549,551],{},[348,548,350],{},[348,550,353],{},[348,552,356],{},[358,554,555,570],{},[345,556,557,562,567],{},[363,558,559],{},[240,560,561],{},"rag.context.token_count",[363,563,564],{},[240,565,566],{},"3840",[363,568,569],{},"Total tokens in assembled context",[345,571,572,577,581],{},[363,573,574],{},[240,575,576],{},"rag.context.truncated",[363,578,579],{},[240,580,479],{},[363,582,583],{},"Whether context was cut",[334,585,587],{"id":586},"span-5-llm-generation","Span 5: LLM Generation",[339,589,590,600],{},[342,591,592],{},[345,593,594,596,598],{},[348,595,350],{},[348,597,353],{},[348,599,356],{},[358,601,602,617,632,647],{},[345,603,604,609,614],{},[363,605,606],{},[240,607,608],{},"gen_ai.request.model",[363,610,611],{},[240,612,613],{},"gpt-4o",[363,615,616],{},"Requested model",[345,618,619,624,629],{},[363,620,621],{},[240,622,623],{},"gen_ai.usage.input_tokens",[363,625,626],{},[240,627,628],{},"4096",[363,630,631],{},"Tokens in the prompt",[345,633,634,639,644],{},[363,635,636],{},[240,637,638],{},"gen_ai.usage.output_tokens",[363,640,641],{},[240,642,643],{},"128",[363,645,646],{},"Tokens in the response",[345,648,649,654,659],{},[363,650,651],{},[240,652,653],{},"gen_ai.response.finish_reasons",[363,655,656],{},[240,657,658],{},"[\"length\"]",[363,660,661],{},"Why generation stopped (string array)",[217,663,664,665,667,668,671,672,674,675,678],{},"When ",[240,666,653],{}," contains ",[240,669,670],{},"\"length\"",", the LLM hit ",[240,673,289],{}," and the answer is incomplete. This value correlates directly with ",[240,676,677],{},"rag.context.truncated = true"," upstream.",[245,680,682],{"id":681},"auto-instrumentation-llamaindex-opentelemetry","Auto-Instrumentation: LlamaIndex + OpenTelemetry",[217,684,685],{},"LlamaIndex has first-class OpenTelemetry support. Two packages are available:",[687,688,689,702],"ul",{},[690,691,692,695,696],"li",{},[240,693,694],{},"openinference-instrumentation-llama-index"," — maintained by Arize\u002FOpenInference, ",[227,697,701],{"href":698,"rel":699},"https:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002Fmodule_guides\u002Fobservability\u002F",[700],"nofollow","recommended in the official LlamaIndex docs",[690,703,704,707],{},[240,705,706],{},"opentelemetry-instrumentation-llamaindex"," — from Traceloop\u002FOpenLLMetry (Traceloop was acquired by ServiceNow in March 2026; the library continues under Apache 2.0)",[217,709,710],{},"Both produce OpenInference-compatible spans. The examples below use the Arize package:",[712,713,718],"pre",{"className":714,"code":715,"language":716,"meta":717,"style":717},"language-bash shiki shiki-themes github-light","pip install openinference-instrumentation-llama-index opentelemetry-exporter-otlp\n","bash","",[240,719,720],{"__ignoreMap":717},[721,722,725,729,733,736],"span",{"class":723,"line":724},"line",1,[721,726,728],{"class":727},"s7eDp","pip",[721,730,732],{"class":731},"sYBdl"," install",[721,734,735],{"class":731}," openinference-instrumentation-llama-index",[721,737,738],{"class":731}," opentelemetry-exporter-otlp\n",[712,740,744],{"className":741,"code":742,"language":743,"meta":717,"style":717},"language-python shiki shiki-themes github-light","import os\nfrom opentelemetry import trace\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom openinference.instrumentation.llama_index import LlamaIndexInstrumentor\n\ndef setup_tracing():\n    exporter = OTLPSpanExporter(\n    endpoint=os.environ.get(\"OTEL_EXPORTER_OTLP_ENDPOINT\", \"http:\u002F\u002Flocalhost:4317\"),\n)\n    provider = TracerProvider()\n    provider.add_span_processor(BatchSpanProcessor(exporter))\n    trace.set_tracer_provider(provider)\n\n    # Instruments LLM calls, retrievers, and embeddings automatically\n    LlamaIndexInstrumentor().instrument()\n\nsetup_tracing()\n\n# Your existing LlamaIndex code works unchanged\nfrom llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n\ndocuments = SimpleDirectoryReader(\".\u002Fdata\").load_data()\nindex = VectorStoreIndex.from_documents(documents)\nquery_engine = index.as_query_engine()\n\nresponse = query_engine.query(\"What is context window truncation?\")\n","python",[240,745,746,756,770,783,796,809,822,829,841,853,877,883,894,900,906,911,918,924,929,935,940,946,959,964,981,992,1003,1008],{"__ignoreMap":717},[721,747,748,752],{"class":723,"line":724},[721,749,751],{"class":750},"sD7c4","import",[721,753,755],{"class":754},"sgsFI"," os\n",[721,757,759,762,765,767],{"class":723,"line":758},2,[721,760,761],{"class":750},"from",[721,763,764],{"class":754}," opentelemetry ",[721,766,751],{"class":750},[721,768,769],{"class":754}," trace\n",[721,771,773,775,778,780],{"class":723,"line":772},3,[721,774,761],{"class":750},[721,776,777],{"class":754}," opentelemetry.sdk.trace ",[721,779,751],{"class":750},[721,781,782],{"class":754}," TracerProvider\n",[721,784,786,788,791,793],{"class":723,"line":785},4,[721,787,761],{"class":750},[721,789,790],{"class":754}," opentelemetry.sdk.trace.export ",[721,792,751],{"class":750},[721,794,795],{"class":754}," BatchSpanProcessor\n",[721,797,799,801,804,806],{"class":723,"line":798},5,[721,800,761],{"class":750},[721,802,803],{"class":754}," opentelemetry.exporter.otlp.proto.grpc.trace_exporter ",[721,805,751],{"class":750},[721,807,808],{"class":754}," OTLPSpanExporter\n",[721,810,812,814,817,819],{"class":723,"line":811},6,[721,813,761],{"class":750},[721,815,816],{"class":754}," openinference.instrumentation.llama_index ",[721,818,751],{"class":750},[721,820,821],{"class":754}," LlamaIndexInstrumentor\n",[721,823,825],{"class":723,"line":824},7,[721,826,828],{"emptyLinePlaceholder":827},true,"\n",[721,830,832,835,838],{"class":723,"line":831},8,[721,833,834],{"class":750},"def",[721,836,837],{"class":727}," setup_tracing",[721,839,840],{"class":754},"():\n",[721,842,844,847,850],{"class":723,"line":843},9,[721,845,846],{"class":754},"    exporter ",[721,848,849],{"class":750},"=",[721,851,852],{"class":754}," OTLPSpanExporter(\n",[721,854,856,860,862,865,868,871,874],{"class":723,"line":855},10,[721,857,859],{"class":858},"sqxcx","    endpoint",[721,861,849],{"class":750},[721,863,864],{"class":754},"os.environ.get(",[721,866,867],{"class":731},"\"OTEL_EXPORTER_OTLP_ENDPOINT\"",[721,869,870],{"class":754},", ",[721,872,873],{"class":731},"\"http:\u002F\u002Flocalhost:4317\"",[721,875,876],{"class":754},"),\n",[721,878,880],{"class":723,"line":879},11,[721,881,882],{"class":754},")\n",[721,884,886,889,891],{"class":723,"line":885},12,[721,887,888],{"class":754},"    provider ",[721,890,849],{"class":750},[721,892,893],{"class":754}," TracerProvider()\n",[721,895,897],{"class":723,"line":896},13,[721,898,899],{"class":754},"    provider.add_span_processor(BatchSpanProcessor(exporter))\n",[721,901,903],{"class":723,"line":902},14,[721,904,905],{"class":754},"    trace.set_tracer_provider(provider)\n",[721,907,909],{"class":723,"line":908},15,[721,910,828],{"emptyLinePlaceholder":827},[721,912,914],{"class":723,"line":913},16,[721,915,917],{"class":916},"sAwPA","    # Instruments LLM calls, retrievers, and embeddings automatically\n",[721,919,921],{"class":723,"line":920},17,[721,922,923],{"class":754},"    LlamaIndexInstrumentor().instrument()\n",[721,925,927],{"class":723,"line":926},18,[721,928,828],{"emptyLinePlaceholder":827},[721,930,932],{"class":723,"line":931},19,[721,933,934],{"class":754},"setup_tracing()\n",[721,936,938],{"class":723,"line":937},20,[721,939,828],{"emptyLinePlaceholder":827},[721,941,943],{"class":723,"line":942},21,[721,944,945],{"class":916},"# Your existing LlamaIndex code works unchanged\n",[721,947,949,951,954,956],{"class":723,"line":948},22,[721,950,761],{"class":750},[721,952,953],{"class":754}," llama_index.core ",[721,955,751],{"class":750},[721,957,958],{"class":754}," VectorStoreIndex, SimpleDirectoryReader\n",[721,960,962],{"class":723,"line":961},23,[721,963,828],{"emptyLinePlaceholder":827},[721,965,967,970,972,975,978],{"class":723,"line":966},24,[721,968,969],{"class":754},"documents ",[721,971,849],{"class":750},[721,973,974],{"class":754}," SimpleDirectoryReader(",[721,976,977],{"class":731},"\".\u002Fdata\"",[721,979,980],{"class":754},").load_data()\n",[721,982,984,987,989],{"class":723,"line":983},25,[721,985,986],{"class":754},"index ",[721,988,849],{"class":750},[721,990,991],{"class":754}," VectorStoreIndex.from_documents(documents)\n",[721,993,995,998,1000],{"class":723,"line":994},26,[721,996,997],{"class":754},"query_engine ",[721,999,849],{"class":750},[721,1001,1002],{"class":754}," index.as_query_engine()\n",[721,1004,1006],{"class":723,"line":1005},27,[721,1007,828],{"emptyLinePlaceholder":827},[721,1009,1011,1014,1016,1019,1022],{"class":723,"line":1010},28,[721,1012,1013],{"class":754},"response ",[721,1015,849],{"class":750},[721,1017,1018],{"class":754}," query_engine.query(",[721,1020,1021],{"class":731},"\"What is context window truncation?\"",[721,1023,882],{"class":754},[217,1025,1026,1029,1030,870,1033,870,1036,1039,1040,870,1043,870,1046,870,1049,1052,1053,1056,1057,1059,1060,1062],{},[240,1027,1028],{},"LlamaIndexInstrumentor().instrument()"," is a single call. After it runs, every LlamaIndex operation — ",[240,1031,1032],{},"VectorStoreRetriever",[240,1034,1035],{},"OpenAIEmbedding",[240,1037,1038],{},"OpenAI"," LLM, and the query engine itself — produces OpenInference-compatible spans with attributes like ",[240,1041,1042],{},"retrieval.documents",[240,1044,1045],{},"document.score",[240,1047,1048],{},"embedding.model_name",[240,1050,1051],{},"llm.token_count.prompt",", and ",[240,1054,1055],{},"llm.token_count.completion",". Note that OpenInference uses its own attribute names — not the ",[240,1058,327],{}," custom attributes described earlier in this guide. The ",[240,1061,327],{}," attributes are for manual instrumentation of custom pipelines; if you use auto-instrumentation, filter and alert on the OpenInference attribute names instead.",[217,1064,1065,1068],{},[255,1066,1067],{},"PII control:"," To prevent query text and retrieved documents from appearing in traces, set:",[712,1070,1072],{"className":714,"code":1071,"language":716,"meta":717,"style":717},"OPENINFERENCE_HIDE_INPUTS=true\nOPENINFERENCE_HIDE_OUTPUTS=true\n",[240,1073,1074,1084],{"__ignoreMap":717},[721,1075,1076,1079,1081],{"class":723,"line":724},[721,1077,1078],{"class":754},"OPENINFERENCE_HIDE_INPUTS",[721,1080,849],{"class":750},[721,1082,1083],{"class":731},"true\n",[721,1085,1086,1089,1091],{"class":723,"line":758},[721,1087,1088],{"class":754},"OPENINFERENCE_HIDE_OUTPUTS",[721,1090,849],{"class":750},[721,1092,1083],{"class":731},[217,1094,1095,1096,1099,1100,1103],{},"These are the OpenInference privacy controls. You can also set ",[240,1097,1098],{},"OPENINFERENCE_HIDE_EMBEDDING_VECTORS=true"," and ",[240,1101,1102],{},"OPENINFERENCE_HIDE_INPUT_TEXT=true"," for more granular control. This suppresses input\u002Foutput content in span attributes while preserving token counts, latency, and structural attributes.",[245,1105,1107],{"id":1106},"auto-instrumentation-langchain-opentelemetry","Auto-Instrumentation: LangChain + OpenTelemetry",[217,1109,1110,1111,1114],{},"LangChain uses a similar pattern via the OpenLLMetry instrumentation package. Traceloop, the maintainer of ",[240,1112,1113],{},"opentelemetry-instrumentation-langchain",", was acquired by ServiceNow in March 2026; the library continues under Apache 2.0.",[712,1116,1118],{"className":714,"code":1117,"language":716,"meta":717,"style":717},"pip install opentelemetry-instrumentation-langchain opentelemetry-exporter-otlp\n",[240,1119,1120],{"__ignoreMap":717},[721,1121,1122,1124,1126,1129],{"class":723,"line":724},[721,1123,728],{"class":727},[721,1125,732],{"class":731},[721,1127,1128],{"class":731}," opentelemetry-instrumentation-langchain",[721,1130,738],{"class":731},[712,1132,1134],{"className":741,"code":1133,"language":743,"meta":717,"style":717},"import os\nfrom opentelemetry import trace\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\nfrom opentelemetry.instrumentation.langchain import LangChainInstrumentor\n\ndef setup_tracing():\n    exporter = OTLPSpanExporter(\n    endpoint=os.environ.get(\"OTEL_EXPORTER_OTLP_ENDPOINT\", \"http:\u002F\u002Flocalhost:4317\"),\n)\n    provider = TracerProvider()\n    provider.add_span_processor(BatchSpanProcessor(exporter))\n    trace.set_tracer_provider(provider)\n\n    LangChainInstrumentor().instrument()\n\nsetup_tracing()\n\nfrom langchain_openai import OpenAIEmbeddings, ChatOpenAI\nfrom langchain_community.vectorstores import FAISS\nfrom langchain.chains import RetrievalQA\n\nembeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\nvectorstore = FAISS.load_local(\".\u002Ffaiss_index\", embeddings)\nretriever = vectorstore.as_retriever(search_kwargs={\"k\": 5})\n\nllm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\nqa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n\nresult = qa_chain.invoke({\"query\": \"Explain vector search latency.\"})\n",[240,1135,1136,1142,1152,1162,1172,1182,1194,1198,1206,1214,1230,1234,1242,1246,1250,1254,1259,1263,1267,1271,1283,1296,1308,1312,1332,1351,1380,1384,1412,1439,1444],{"__ignoreMap":717},[721,1137,1138,1140],{"class":723,"line":724},[721,1139,751],{"class":750},[721,1141,755],{"class":754},[721,1143,1144,1146,1148,1150],{"class":723,"line":758},[721,1145,761],{"class":750},[721,1147,764],{"class":754},[721,1149,751],{"class":750},[721,1151,769],{"class":754},[721,1153,1154,1156,1158,1160],{"class":723,"line":772},[721,1155,761],{"class":750},[721,1157,777],{"class":754},[721,1159,751],{"class":750},[721,1161,782],{"class":754},[721,1163,1164,1166,1168,1170],{"class":723,"line":785},[721,1165,761],{"class":750},[721,1167,790],{"class":754},[721,1169,751],{"class":750},[721,1171,795],{"class":754},[721,1173,1174,1176,1178,1180],{"class":723,"line":798},[721,1175,761],{"class":750},[721,1177,803],{"class":754},[721,1179,751],{"class":750},[721,1181,808],{"class":754},[721,1183,1184,1186,1189,1191],{"class":723,"line":811},[721,1185,761],{"class":750},[721,1187,1188],{"class":754}," opentelemetry.instrumentation.langchain ",[721,1190,751],{"class":750},[721,1192,1193],{"class":754}," LangChainInstrumentor\n",[721,1195,1196],{"class":723,"line":824},[721,1197,828],{"emptyLinePlaceholder":827},[721,1199,1200,1202,1204],{"class":723,"line":831},[721,1201,834],{"class":750},[721,1203,837],{"class":727},[721,1205,840],{"class":754},[721,1207,1208,1210,1212],{"class":723,"line":843},[721,1209,846],{"class":754},[721,1211,849],{"class":750},[721,1213,852],{"class":754},[721,1215,1216,1218,1220,1222,1224,1226,1228],{"class":723,"line":855},[721,1217,859],{"class":858},[721,1219,849],{"class":750},[721,1221,864],{"class":754},[721,1223,867],{"class":731},[721,1225,870],{"class":754},[721,1227,873],{"class":731},[721,1229,876],{"class":754},[721,1231,1232],{"class":723,"line":879},[721,1233,882],{"class":754},[721,1235,1236,1238,1240],{"class":723,"line":885},[721,1237,888],{"class":754},[721,1239,849],{"class":750},[721,1241,893],{"class":754},[721,1243,1244],{"class":723,"line":896},[721,1245,899],{"class":754},[721,1247,1248],{"class":723,"line":902},[721,1249,905],{"class":754},[721,1251,1252],{"class":723,"line":908},[721,1253,828],{"emptyLinePlaceholder":827},[721,1255,1256],{"class":723,"line":913},[721,1257,1258],{"class":754},"    LangChainInstrumentor().instrument()\n",[721,1260,1261],{"class":723,"line":920},[721,1262,828],{"emptyLinePlaceholder":827},[721,1264,1265],{"class":723,"line":926},[721,1266,934],{"class":754},[721,1268,1269],{"class":723,"line":931},[721,1270,828],{"emptyLinePlaceholder":827},[721,1272,1273,1275,1278,1280],{"class":723,"line":937},[721,1274,761],{"class":750},[721,1276,1277],{"class":754}," langchain_openai ",[721,1279,751],{"class":750},[721,1281,1282],{"class":754}," OpenAIEmbeddings, ChatOpenAI\n",[721,1284,1285,1287,1290,1292],{"class":723,"line":942},[721,1286,761],{"class":750},[721,1288,1289],{"class":754}," langchain_community.vectorstores ",[721,1291,751],{"class":750},[721,1293,1295],{"class":1294},"sYu0t"," FAISS\n",[721,1297,1298,1300,1303,1305],{"class":723,"line":948},[721,1299,761],{"class":750},[721,1301,1302],{"class":754}," langchain.chains ",[721,1304,751],{"class":750},[721,1306,1307],{"class":754}," RetrievalQA\n",[721,1309,1310],{"class":723,"line":961},[721,1311,828],{"emptyLinePlaceholder":827},[721,1313,1314,1317,1319,1322,1325,1327,1330],{"class":723,"line":966},[721,1315,1316],{"class":754},"embeddings ",[721,1318,849],{"class":750},[721,1320,1321],{"class":754}," OpenAIEmbeddings(",[721,1323,1324],{"class":858},"model",[721,1326,849],{"class":750},[721,1328,1329],{"class":731},"\"text-embedding-3-small\"",[721,1331,882],{"class":754},[721,1333,1334,1337,1339,1342,1345,1348],{"class":723,"line":983},[721,1335,1336],{"class":754},"vectorstore ",[721,1338,849],{"class":750},[721,1340,1341],{"class":1294}," FAISS",[721,1343,1344],{"class":754},".load_local(",[721,1346,1347],{"class":731},"\".\u002Ffaiss_index\"",[721,1349,1350],{"class":754},", embeddings)\n",[721,1352,1353,1356,1358,1361,1364,1366,1369,1372,1375,1377],{"class":723,"line":994},[721,1354,1355],{"class":754},"retriever ",[721,1357,849],{"class":750},[721,1359,1360],{"class":754}," vectorstore.as_retriever(",[721,1362,1363],{"class":858},"search_kwargs",[721,1365,849],{"class":750},[721,1367,1368],{"class":754},"{",[721,1370,1371],{"class":731},"\"k\"",[721,1373,1374],{"class":754},": ",[721,1376,449],{"class":1294},[721,1378,1379],{"class":754},"})\n",[721,1381,1382],{"class":723,"line":1005},[721,1383,828],{"emptyLinePlaceholder":827},[721,1385,1386,1389,1391,1394,1396,1398,1401,1403,1406,1408,1410],{"class":723,"line":1010},[721,1387,1388],{"class":754},"llm ",[721,1390,849],{"class":750},[721,1392,1393],{"class":754}," ChatOpenAI(",[721,1395,1324],{"class":858},[721,1397,849],{"class":750},[721,1399,1400],{"class":731},"\"gpt-4o\"",[721,1402,870],{"class":754},[721,1404,1405],{"class":858},"temperature",[721,1407,849],{"class":750},[721,1409,464],{"class":1294},[721,1411,882],{"class":754},[721,1413,1415,1418,1420,1423,1426,1428,1431,1434,1436],{"class":723,"line":1414},29,[721,1416,1417],{"class":754},"qa_chain ",[721,1419,849],{"class":750},[721,1421,1422],{"class":754}," RetrievalQA.from_chain_type(",[721,1424,1425],{"class":858},"llm",[721,1427,849],{"class":750},[721,1429,1430],{"class":754},"llm, ",[721,1432,1433],{"class":858},"retriever",[721,1435,849],{"class":750},[721,1437,1438],{"class":754},"retriever)\n",[721,1440,1442],{"class":723,"line":1441},30,[721,1443,828],{"emptyLinePlaceholder":827},[721,1445,1447,1450,1452,1455,1458,1460,1463],{"class":723,"line":1446},31,[721,1448,1449],{"class":754},"result ",[721,1451,849],{"class":750},[721,1453,1454],{"class":754}," qa_chain.invoke({",[721,1456,1457],{"class":731},"\"query\"",[721,1459,1374],{"class":754},[721,1461,1462],{"class":731},"\"Explain vector search latency.\"",[721,1464,1379],{"class":754},[217,1466,485,1467,1470,1471,1474],{},[240,1468,1469],{},"LangChainInstrumentor"," wraps the ",[240,1472,1473],{},"RetrievalQA"," chain and produces a span tree that shows:",[687,1476,1477,1480,1486],{},[690,1478,1479],{},"The root chain invocation with overall latency",[690,1481,1482,1483,1485],{},"A ",[240,1484,1433],{}," child span with the number of documents retrieved",[690,1487,1488,1489,1491,1492,870,1494,1052,1496],{},"An ",[240,1490,1425],{}," child span with ",[240,1493,623],{},[240,1495,638],{},[240,1497,653],{},[217,1499,1500,1501,1099,1503,1505],{},"The gap between auto-instrumentation and complete observability is ",[240,1502,474],{},[240,1504,576],{}," — the LangChain instrumentation does not currently set these. Add them with manual spans, shown in the next section.",[245,1507,1509],{"id":1508},"manual-instrumentation-for-custom-rag-pipelines","Manual Instrumentation for Custom RAG Pipelines",[217,1511,1512],{},"Auto-instrumentation covers the standard LlamaIndex and LangChain call paths. For custom retrievers, custom rerankers, or any logic outside those frameworks, we instrument manually:",[712,1514,1516],{"className":741,"code":1515,"language":743,"meta":717,"style":717},"import os\nfrom opentelemetry import trace\nfrom opentelemetry.sdk.trace import TracerProvider\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\n\nexporter = OTLPSpanExporter(\n    endpoint=os.environ.get(\"OTEL_EXPORTER_OTLP_ENDPOINT\", \"http:\u002F\u002Flocalhost:4317\"),\n)\nprovider = TracerProvider()\nprovider.add_span_processor(BatchSpanProcessor(exporter))\ntrace.set_tracer_provider(provider)\n\ntracer = trace.get_tracer(\"rag.pipeline\")\n\n\ndef embed_query(query: str) -> list[float]:\n    # Replace with your actual embedding call\n    import time\n    start = time.monotonic()\n    vector = your_embedding_model.encode(query)\n    duration_ms = (time.monotonic() - start) * 1000\n\n    span = trace.get_current_span()\n    span.set_attribute(\"rag.query.text\", query)\n    span.set_attribute(\"rag.embedding.model\", \"text-embedding-3-small\")\n    span.set_attribute(\"rag.embedding.duration_ms\", round(duration_ms))\n    return vector\n\n\ndef retrieve(vector: list[float], top_k: int = 5) -> list[dict]:\n    results = your_vector_db.search(vector, top_k=top_k)\n    span = trace.get_current_span()\n    span.set_attribute(\"rag.retrieval.top_k\", top_k)\n    span.set_attribute(\"rag.retrieval.results_count\", len(results))\n    span.set_attribute(\"rag.retrieval.empty_result\", len(results) == 0)\n    return results\n\n\ndef assemble_context(chunks: list[dict], max_tokens: int = 3000) -> tuple[str, bool]:\n    text = \"\\n\\n\".join(c[\"text\"] for c in chunks)\n    token_count = len(text.split())  # simplified; use a real tokenizer\n    truncated = token_count > max_tokens\n    if truncated:\n        text = \" \".join(text.split()[:max_tokens])\n\n    span = trace.get_current_span()\n    span.set_attribute(\"rag.context.token_count\", token_count)\n    span.set_attribute(\"rag.context.truncated\", truncated)\n    return text, truncated\n\n\ndef run_rag_pipeline(query: str) -> str:\n    with tracer.start_as_current_span(\"rag.query\") as root_span:\n        root_span.set_attribute(\"rag.query.text\", query)\n\n        with tracer.start_as_current_span(\"rag.embed\"):\n            vector = embed_query(query)\n\n        with tracer.start_as_current_span(\"rag.retrieve\"):\n            chunks = retrieve(vector)\n\n        if not chunks:\n            root_span.set_attribute(\"rag.retrieval.empty_result\", True)\n            return \"No relevant documents found.\"\n\n        with tracer.start_as_current_span(\"rag.assemble\"):\n            context, truncated = assemble_context(chunks)\n\n        with tracer.start_as_current_span(\"rag.generate\"):\n            response = your_llm.generate(context=context, query=query)\n            span = trace.get_current_span()\n            span.set_attribute(\"gen_ai.request.model\", \"gpt-4o\")\n            span.set_attribute(\"gen_ai.usage.input_tokens\", response.usage.prompt_tokens)\n            span.set_attribute(\"gen_ai.usage.output_tokens\", response.usage.completion_tokens)\n            span.set_attribute(\"gen_ai.response.finish_reasons\", [response.choices[0].finish_reason])\n\n        return response.choices[0].message.content\n",[240,1517,1518,1524,1534,1544,1554,1564,1568,1577,1593,1597,1606,1611,1616,1620,1635,1639,1643,1665,1670,1678,1688,1698,1720,1724,1734,1745,1758,1773,1781,1785,1789,1820,1839,1848,1859,1875,1898,1906,1911,1916,1951,1989,2006,2023,2032,2046,2051,2060,2071,2082,2090,2095,2100,2120,2141,2151,2156,2170,2181,2186,2198,2209,2214,2226,2241,2250,2255,2267,2278,2283,2295,2322,2332,2347,2358,2369,2385,2390],{"__ignoreMap":717},[721,1519,1520,1522],{"class":723,"line":724},[721,1521,751],{"class":750},[721,1523,755],{"class":754},[721,1525,1526,1528,1530,1532],{"class":723,"line":758},[721,1527,761],{"class":750},[721,1529,764],{"class":754},[721,1531,751],{"class":750},[721,1533,769],{"class":754},[721,1535,1536,1538,1540,1542],{"class":723,"line":772},[721,1537,761],{"class":750},[721,1539,777],{"class":754},[721,1541,751],{"class":750},[721,1543,782],{"class":754},[721,1545,1546,1548,1550,1552],{"class":723,"line":785},[721,1547,761],{"class":750},[721,1549,790],{"class":754},[721,1551,751],{"class":750},[721,1553,795],{"class":754},[721,1555,1556,1558,1560,1562],{"class":723,"line":798},[721,1557,761],{"class":750},[721,1559,803],{"class":754},[721,1561,751],{"class":750},[721,1563,808],{"class":754},[721,1565,1566],{"class":723,"line":811},[721,1567,828],{"emptyLinePlaceholder":827},[721,1569,1570,1573,1575],{"class":723,"line":824},[721,1571,1572],{"class":754},"exporter ",[721,1574,849],{"class":750},[721,1576,852],{"class":754},[721,1578,1579,1581,1583,1585,1587,1589,1591],{"class":723,"line":831},[721,1580,859],{"class":858},[721,1582,849],{"class":750},[721,1584,864],{"class":754},[721,1586,867],{"class":731},[721,1588,870],{"class":754},[721,1590,873],{"class":731},[721,1592,876],{"class":754},[721,1594,1595],{"class":723,"line":843},[721,1596,882],{"class":754},[721,1598,1599,1602,1604],{"class":723,"line":855},[721,1600,1601],{"class":754},"provider ",[721,1603,849],{"class":750},[721,1605,893],{"class":754},[721,1607,1608],{"class":723,"line":879},[721,1609,1610],{"class":754},"provider.add_span_processor(BatchSpanProcessor(exporter))\n",[721,1612,1613],{"class":723,"line":885},[721,1614,1615],{"class":754},"trace.set_tracer_provider(provider)\n",[721,1617,1618],{"class":723,"line":896},[721,1619,828],{"emptyLinePlaceholder":827},[721,1621,1622,1625,1627,1630,1633],{"class":723,"line":902},[721,1623,1624],{"class":754},"tracer ",[721,1626,849],{"class":750},[721,1628,1629],{"class":754}," trace.get_tracer(",[721,1631,1632],{"class":731},"\"rag.pipeline\"",[721,1634,882],{"class":754},[721,1636,1637],{"class":723,"line":908},[721,1638,828],{"emptyLinePlaceholder":827},[721,1640,1641],{"class":723,"line":913},[721,1642,828],{"emptyLinePlaceholder":827},[721,1644,1645,1647,1650,1653,1656,1659,1662],{"class":723,"line":920},[721,1646,834],{"class":750},[721,1648,1649],{"class":727}," embed_query",[721,1651,1652],{"class":754},"(query: ",[721,1654,1655],{"class":1294},"str",[721,1657,1658],{"class":754},") -> list[",[721,1660,1661],{"class":1294},"float",[721,1663,1664],{"class":754},"]:\n",[721,1666,1667],{"class":723,"line":926},[721,1668,1669],{"class":916},"    # Replace with your actual embedding call\n",[721,1671,1672,1675],{"class":723,"line":931},[721,1673,1674],{"class":750},"    import",[721,1676,1677],{"class":754}," time\n",[721,1679,1680,1683,1685],{"class":723,"line":937},[721,1681,1682],{"class":754},"    start ",[721,1684,849],{"class":750},[721,1686,1687],{"class":754}," time.monotonic()\n",[721,1689,1690,1693,1695],{"class":723,"line":942},[721,1691,1692],{"class":754},"    vector ",[721,1694,849],{"class":750},[721,1696,1697],{"class":754}," your_embedding_model.encode(query)\n",[721,1699,1700,1703,1705,1708,1711,1714,1717],{"class":723,"line":948},[721,1701,1702],{"class":754},"    duration_ms ",[721,1704,849],{"class":750},[721,1706,1707],{"class":754}," (time.monotonic() ",[721,1709,1710],{"class":750},"-",[721,1712,1713],{"class":754}," start) ",[721,1715,1716],{"class":750},"*",[721,1718,1719],{"class":1294}," 1000\n",[721,1721,1722],{"class":723,"line":961},[721,1723,828],{"emptyLinePlaceholder":827},[721,1725,1726,1729,1731],{"class":723,"line":966},[721,1727,1728],{"class":754},"    span ",[721,1730,849],{"class":750},[721,1732,1733],{"class":754}," trace.get_current_span()\n",[721,1735,1736,1739,1742],{"class":723,"line":983},[721,1737,1738],{"class":754},"    span.set_attribute(",[721,1740,1741],{"class":731},"\"rag.query.text\"",[721,1743,1744],{"class":754},", query)\n",[721,1746,1747,1749,1752,1754,1756],{"class":723,"line":994},[721,1748,1738],{"class":754},[721,1750,1751],{"class":731},"\"rag.embedding.model\"",[721,1753,870],{"class":754},[721,1755,1329],{"class":731},[721,1757,882],{"class":754},[721,1759,1760,1762,1765,1767,1770],{"class":723,"line":1005},[721,1761,1738],{"class":754},[721,1763,1764],{"class":731},"\"rag.embedding.duration_ms\"",[721,1766,870],{"class":754},[721,1768,1769],{"class":1294},"round",[721,1771,1772],{"class":754},"(duration_ms))\n",[721,1774,1775,1778],{"class":723,"line":1010},[721,1776,1777],{"class":750},"    return",[721,1779,1780],{"class":754}," vector\n",[721,1782,1783],{"class":723,"line":1414},[721,1784,828],{"emptyLinePlaceholder":827},[721,1786,1787],{"class":723,"line":1441},[721,1788,828],{"emptyLinePlaceholder":827},[721,1790,1791,1793,1796,1799,1801,1804,1807,1810,1813,1815,1818],{"class":723,"line":1446},[721,1792,834],{"class":750},[721,1794,1795],{"class":727}," retrieve",[721,1797,1798],{"class":754},"(vector: list[",[721,1800,1661],{"class":1294},[721,1802,1803],{"class":754},"], top_k: ",[721,1805,1806],{"class":1294},"int",[721,1808,1809],{"class":750}," =",[721,1811,1812],{"class":1294}," 5",[721,1814,1658],{"class":754},[721,1816,1817],{"class":1294},"dict",[721,1819,1664],{"class":754},[721,1821,1823,1826,1828,1831,1834,1836],{"class":723,"line":1822},32,[721,1824,1825],{"class":754},"    results ",[721,1827,849],{"class":750},[721,1829,1830],{"class":754}," your_vector_db.search(vector, ",[721,1832,1833],{"class":858},"top_k",[721,1835,849],{"class":750},[721,1837,1838],{"class":754},"top_k)\n",[721,1840,1842,1844,1846],{"class":723,"line":1841},33,[721,1843,1728],{"class":754},[721,1845,849],{"class":750},[721,1847,1733],{"class":754},[721,1849,1851,1853,1856],{"class":723,"line":1850},34,[721,1852,1738],{"class":754},[721,1854,1855],{"class":731},"\"rag.retrieval.top_k\"",[721,1857,1858],{"class":754},", top_k)\n",[721,1860,1862,1864,1867,1869,1872],{"class":723,"line":1861},35,[721,1863,1738],{"class":754},[721,1865,1866],{"class":731},"\"rag.retrieval.results_count\"",[721,1868,870],{"class":754},[721,1870,1871],{"class":1294},"len",[721,1873,1874],{"class":754},"(results))\n",[721,1876,1878,1880,1883,1885,1887,1890,1893,1896],{"class":723,"line":1877},36,[721,1879,1738],{"class":754},[721,1881,1882],{"class":731},"\"rag.retrieval.empty_result\"",[721,1884,870],{"class":754},[721,1886,1871],{"class":1294},[721,1888,1889],{"class":754},"(results) ",[721,1891,1892],{"class":750},"==",[721,1894,1895],{"class":1294}," 0",[721,1897,882],{"class":754},[721,1899,1901,1903],{"class":723,"line":1900},37,[721,1902,1777],{"class":750},[721,1904,1905],{"class":754}," results\n",[721,1907,1909],{"class":723,"line":1908},38,[721,1910,828],{"emptyLinePlaceholder":827},[721,1912,1914],{"class":723,"line":1913},39,[721,1915,828],{"emptyLinePlaceholder":827},[721,1917,1919,1921,1924,1927,1929,1932,1934,1936,1939,1942,1944,1946,1949],{"class":723,"line":1918},40,[721,1920,834],{"class":750},[721,1922,1923],{"class":727}," assemble_context",[721,1925,1926],{"class":754},"(chunks: list[",[721,1928,1817],{"class":1294},[721,1930,1931],{"class":754},"], max_tokens: ",[721,1933,1806],{"class":1294},[721,1935,1809],{"class":750},[721,1937,1938],{"class":1294}," 3000",[721,1940,1941],{"class":754},") -> tuple[",[721,1943,1655],{"class":1294},[721,1945,870],{"class":754},[721,1947,1948],{"class":1294},"bool",[721,1950,1664],{"class":754},[721,1952,1954,1957,1959,1962,1965,1968,1971,1974,1977,1980,1983,1986],{"class":723,"line":1953},41,[721,1955,1956],{"class":754},"    text ",[721,1958,849],{"class":750},[721,1960,1961],{"class":731}," \"",[721,1963,1964],{"class":1294},"\\n\\n",[721,1966,1967],{"class":731},"\"",[721,1969,1970],{"class":754},".join(c[",[721,1972,1973],{"class":731},"\"text\"",[721,1975,1976],{"class":754},"] ",[721,1978,1979],{"class":750},"for",[721,1981,1982],{"class":754}," c ",[721,1984,1985],{"class":750},"in",[721,1987,1988],{"class":754}," chunks)\n",[721,1990,1992,1995,1997,2000,2003],{"class":723,"line":1991},42,[721,1993,1994],{"class":754},"    token_count ",[721,1996,849],{"class":750},[721,1998,1999],{"class":1294}," len",[721,2001,2002],{"class":754},"(text.split())  ",[721,2004,2005],{"class":916},"# simplified; use a real tokenizer\n",[721,2007,2009,2012,2014,2017,2020],{"class":723,"line":2008},43,[721,2010,2011],{"class":754},"    truncated ",[721,2013,849],{"class":750},[721,2015,2016],{"class":754}," token_count ",[721,2018,2019],{"class":750},">",[721,2021,2022],{"class":754}," max_tokens\n",[721,2024,2026,2029],{"class":723,"line":2025},44,[721,2027,2028],{"class":750},"    if",[721,2030,2031],{"class":754}," truncated:\n",[721,2033,2035,2038,2040,2043],{"class":723,"line":2034},45,[721,2036,2037],{"class":754},"        text ",[721,2039,849],{"class":750},[721,2041,2042],{"class":731}," \" \"",[721,2044,2045],{"class":754},".join(text.split()[:max_tokens])\n",[721,2047,2049],{"class":723,"line":2048},46,[721,2050,828],{"emptyLinePlaceholder":827},[721,2052,2054,2056,2058],{"class":723,"line":2053},47,[721,2055,1728],{"class":754},[721,2057,849],{"class":750},[721,2059,1733],{"class":754},[721,2061,2063,2065,2068],{"class":723,"line":2062},48,[721,2064,1738],{"class":754},[721,2066,2067],{"class":731},"\"rag.context.token_count\"",[721,2069,2070],{"class":754},", token_count)\n",[721,2072,2074,2076,2079],{"class":723,"line":2073},49,[721,2075,1738],{"class":754},[721,2077,2078],{"class":731},"\"rag.context.truncated\"",[721,2080,2081],{"class":754},", truncated)\n",[721,2083,2085,2087],{"class":723,"line":2084},50,[721,2086,1777],{"class":750},[721,2088,2089],{"class":754}," text, truncated\n",[721,2091,2093],{"class":723,"line":2092},51,[721,2094,828],{"emptyLinePlaceholder":827},[721,2096,2098],{"class":723,"line":2097},52,[721,2099,828],{"emptyLinePlaceholder":827},[721,2101,2103,2105,2108,2110,2112,2115,2117],{"class":723,"line":2102},53,[721,2104,834],{"class":750},[721,2106,2107],{"class":727}," run_rag_pipeline",[721,2109,1652],{"class":754},[721,2111,1655],{"class":1294},[721,2113,2114],{"class":754},") -> ",[721,2116,1655],{"class":1294},[721,2118,2119],{"class":754},":\n",[721,2121,2123,2126,2129,2132,2135,2138],{"class":723,"line":2122},54,[721,2124,2125],{"class":750},"    with",[721,2127,2128],{"class":754}," tracer.start_as_current_span(",[721,2130,2131],{"class":731},"\"rag.query\"",[721,2133,2134],{"class":754},") ",[721,2136,2137],{"class":750},"as",[721,2139,2140],{"class":754}," root_span:\n",[721,2142,2144,2147,2149],{"class":723,"line":2143},55,[721,2145,2146],{"class":754},"        root_span.set_attribute(",[721,2148,1741],{"class":731},[721,2150,1744],{"class":754},[721,2152,2154],{"class":723,"line":2153},56,[721,2155,828],{"emptyLinePlaceholder":827},[721,2157,2159,2162,2164,2167],{"class":723,"line":2158},57,[721,2160,2161],{"class":750},"        with",[721,2163,2128],{"class":754},[721,2165,2166],{"class":731},"\"rag.embed\"",[721,2168,2169],{"class":754},"):\n",[721,2171,2173,2176,2178],{"class":723,"line":2172},58,[721,2174,2175],{"class":754},"            vector ",[721,2177,849],{"class":750},[721,2179,2180],{"class":754}," embed_query(query)\n",[721,2182,2184],{"class":723,"line":2183},59,[721,2185,828],{"emptyLinePlaceholder":827},[721,2187,2189,2191,2193,2196],{"class":723,"line":2188},60,[721,2190,2161],{"class":750},[721,2192,2128],{"class":754},[721,2194,2195],{"class":731},"\"rag.retrieve\"",[721,2197,2169],{"class":754},[721,2199,2201,2204,2206],{"class":723,"line":2200},61,[721,2202,2203],{"class":754},"            chunks ",[721,2205,849],{"class":750},[721,2207,2208],{"class":754}," retrieve(vector)\n",[721,2210,2212],{"class":723,"line":2211},62,[721,2213,828],{"emptyLinePlaceholder":827},[721,2215,2217,2220,2223],{"class":723,"line":2216},63,[721,2218,2219],{"class":750},"        if",[721,2221,2222],{"class":750}," not",[721,2224,2225],{"class":754}," chunks:\n",[721,2227,2229,2232,2234,2236,2239],{"class":723,"line":2228},64,[721,2230,2231],{"class":754},"            root_span.set_attribute(",[721,2233,1882],{"class":731},[721,2235,870],{"class":754},[721,2237,2238],{"class":1294},"True",[721,2240,882],{"class":754},[721,2242,2244,2247],{"class":723,"line":2243},65,[721,2245,2246],{"class":750},"            return",[721,2248,2249],{"class":731}," \"No relevant documents found.\"\n",[721,2251,2253],{"class":723,"line":2252},66,[721,2254,828],{"emptyLinePlaceholder":827},[721,2256,2258,2260,2262,2265],{"class":723,"line":2257},67,[721,2259,2161],{"class":750},[721,2261,2128],{"class":754},[721,2263,2264],{"class":731},"\"rag.assemble\"",[721,2266,2169],{"class":754},[721,2268,2270,2273,2275],{"class":723,"line":2269},68,[721,2271,2272],{"class":754},"            context, truncated ",[721,2274,849],{"class":750},[721,2276,2277],{"class":754}," assemble_context(chunks)\n",[721,2279,2281],{"class":723,"line":2280},69,[721,2282,828],{"emptyLinePlaceholder":827},[721,2284,2286,2288,2290,2293],{"class":723,"line":2285},70,[721,2287,2161],{"class":750},[721,2289,2128],{"class":754},[721,2291,2292],{"class":731},"\"rag.generate\"",[721,2294,2169],{"class":754},[721,2296,2298,2301,2303,2306,2309,2311,2314,2317,2319],{"class":723,"line":2297},71,[721,2299,2300],{"class":754},"            response ",[721,2302,849],{"class":750},[721,2304,2305],{"class":754}," your_llm.generate(",[721,2307,2308],{"class":858},"context",[721,2310,849],{"class":750},[721,2312,2313],{"class":754},"context, ",[721,2315,2316],{"class":858},"query",[721,2318,849],{"class":750},[721,2320,2321],{"class":754},"query)\n",[721,2323,2325,2328,2330],{"class":723,"line":2324},72,[721,2326,2327],{"class":754},"            span ",[721,2329,849],{"class":750},[721,2331,1733],{"class":754},[721,2333,2335,2338,2341,2343,2345],{"class":723,"line":2334},73,[721,2336,2337],{"class":754},"            span.set_attribute(",[721,2339,2340],{"class":731},"\"gen_ai.request.model\"",[721,2342,870],{"class":754},[721,2344,1400],{"class":731},[721,2346,882],{"class":754},[721,2348,2350,2352,2355],{"class":723,"line":2349},74,[721,2351,2337],{"class":754},[721,2353,2354],{"class":731},"\"gen_ai.usage.input_tokens\"",[721,2356,2357],{"class":754},", response.usage.prompt_tokens)\n",[721,2359,2361,2363,2366],{"class":723,"line":2360},75,[721,2362,2337],{"class":754},[721,2364,2365],{"class":731},"\"gen_ai.usage.output_tokens\"",[721,2367,2368],{"class":754},", response.usage.completion_tokens)\n",[721,2370,2372,2374,2377,2380,2382],{"class":723,"line":2371},76,[721,2373,2337],{"class":754},[721,2375,2376],{"class":731},"\"gen_ai.response.finish_reasons\"",[721,2378,2379],{"class":754},", [response.choices[",[721,2381,464],{"class":1294},[721,2383,2384],{"class":754},"].finish_reason])\n",[721,2386,2388],{"class":723,"line":2387},77,[721,2389,828],{"emptyLinePlaceholder":827},[721,2391,2393,2396,2399,2401],{"class":723,"line":2392},78,[721,2394,2395],{"class":750},"        return",[721,2397,2398],{"class":754}," response.choices[",[721,2400,464],{"class":1294},[721,2402,2403],{"class":754},"].message.content\n",[217,2405,2406,2407,2410,2411,2413,2414,2410,2417,2419],{},"The key instrumentation points are in ",[240,2408,2409],{},"retrieve()"," — setting ",[240,2412,474],{}," — and in ",[240,2415,2416],{},"assemble_context()",[240,2418,576],{},". These two booleans power the most important production alerts.",[245,2421,2423],{"id":2422},"sending-rag-traces-to-uptrace","Sending RAG Traces to Uptrace",[217,2425,2426,2427,2431],{},"Uptrace, an ",[227,2428,2430],{"href":2429},"\u002Fopentelemetry\u002Fapm","OpenTelemetry-native APM",", accepts traces via OTLP and indexes all span attributes as queryable fields. The exporter configuration is the same across all three approaches:",[712,2433,2435],{"className":741,"code":2434,"language":743,"meta":717,"style":717},"from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\n\nexporter = OTLPSpanExporter(\n    endpoint=\"https:\u002F\u002Fapi.uptrace.dev:4317\",\n    headers={\"uptrace-dsn\": \"https:\u002F\u002F\u003Csecret>@api.uptrace.dev?grpc=4317\"},\n)\n",[240,2436,2437,2447,2451,2459,2471,2491],{"__ignoreMap":717},[721,2438,2439,2441,2443,2445],{"class":723,"line":724},[721,2440,761],{"class":750},[721,2442,803],{"class":754},[721,2444,751],{"class":750},[721,2446,808],{"class":754},[721,2448,2449],{"class":723,"line":758},[721,2450,828],{"emptyLinePlaceholder":827},[721,2452,2453,2455,2457],{"class":723,"line":772},[721,2454,1572],{"class":754},[721,2456,849],{"class":750},[721,2458,852],{"class":754},[721,2460,2461,2463,2465,2468],{"class":723,"line":785},[721,2462,859],{"class":858},[721,2464,849],{"class":750},[721,2466,2467],{"class":731},"\"https:\u002F\u002Fapi.uptrace.dev:4317\"",[721,2469,2470],{"class":754},",\n",[721,2472,2473,2476,2478,2480,2483,2485,2488],{"class":723,"line":798},[721,2474,2475],{"class":858},"    headers",[721,2477,849],{"class":750},[721,2479,1368],{"class":754},[721,2481,2482],{"class":731},"\"uptrace-dsn\"",[721,2484,1374],{"class":754},[721,2486,2487],{"class":731},"\"https:\u002F\u002F\u003Csecret>@api.uptrace.dev?grpc=4317\"",[721,2489,2490],{"class":754},"},\n",[721,2492,2493],{"class":723,"line":811},[721,2494,882],{"class":754},[217,2496,2497,2498,2502],{},"After traces arrive, ",[227,2499,2501],{"href":2500},"\u002Fproduct\u002Ftracing","distributed tracing in Uptrace"," lets you:",[687,2504,2505,2514,2522,2532],{},[690,2506,2507,2513],{},[255,2508,2509,2510],{},"Filter by ",[240,2511,2512],{},"rag.retrieval.empty_result = true"," to find all requests where retrieval returned nothing",[690,2515,2516,2521],{},[255,2517,2518,2519],{},"Group by ",[240,2520,608],{}," to compare token usage and latency across model versions",[690,2523,2524,2531],{},[255,2525,2509,2526,2528,2529],{},[240,2527,653],{}," containing ",[240,2530,670],{}," to find truncated responses",[690,2533,2534,2537],{},[255,2535,2536],{},"Sort by span duration"," to identify which pipeline stage is the latency bottleneck",[217,2539,485,2540,2542],{},[240,2541,327],{}," attributes are stored as custom attributes in Uptrace and are immediately searchable without any schema configuration.",[245,2544,2546],{"id":2545},"what-to-alert-on","What to Alert On",[217,2548,2549,2550,2554],{},"Four alert conditions cover the most common RAG failure modes. Tie them to ",[227,2551,2553],{"href":2552},"\u002Fblog\u002Fsla-slo-monitoring-requirements","SLA\u002FSLO monitoring requirements"," to define acceptable thresholds per environment.",[339,2556,2557,2570],{},[342,2558,2559],{},[345,2560,2561,2564,2567],{},[348,2562,2563],{},"Condition",[348,2565,2566],{},"Suggested threshold",[348,2568,2569],{},"What it means",[358,2571,2572,2585,2602,2613],{},[345,2573,2574,2579,2582],{},[363,2575,2576,2578],{},[240,2577,474],{}," rate",[363,2580,2581],{},"> 5% of requests",[363,2583,2584],{},"Index staleness, query distribution shift",[345,2586,2587,2593,2596],{},[363,2588,2589,667,2591,2578],{},[240,2590,653],{},[240,2592,670],{},[363,2594,2595],{},"> 2% of requests",[363,2597,2598,2599,2601],{},"Context too large, ",[240,2600,289],{}," too low",[345,2603,2604,2607,2610],{},[363,2605,2606],{},"p95 vector search latency",[363,2608,2609],{},"> 500ms",[363,2611,2612],{},"Vector DB performance degradation",[345,2614,2615,2618,2621],{},[363,2616,2617],{},"p95 end-to-end RAG latency",[363,2619,2620],{},"> 3s",[363,2622,2623],{},"Compound latency across all stages",[217,2625,2626,2627,2528,2630,2632,2633,2635],{},"Empty retrieval above 5% typically indicates the vector index is stale or the embedding model was swapped without re-indexing. A ",[240,2628,2629],{},"finish_reasons",[240,2631,670],{}," rate above 2% means users are consistently receiving incomplete answers — investigate ",[240,2634,576],{}," in the same traces to confirm.",[245,2637,2639],{"id":2638},"faq","FAQ",[217,2641,2642],{},[255,2643,2644],{},"What is RAG observability?",[217,2646,2647],{},"RAG observability is the practice of tracing and measuring every stage of a Retrieval-Augmented Generation pipeline — embedding, vector search, reranking, context assembly, and LLM generation. The goal is to make failures visible in production, because most RAG failure modes (empty retrieval, context truncation, incomplete generation) produce no exception and no error log by default.",[217,2649,2650],{},[255,2651,2652],{},"How do I trace a RAG pipeline with OpenTelemetry?",[217,2654,2655,2656,2658,2659,2662,2663,2666,2667,2670,2671,1099,2673,2675],{},"Use ",[240,2657,1028],{}," or ",[240,2660,2661],{},"LangChainInstrumentor().instrument()"," for framework-based pipelines. For custom pipelines, use ",[240,2664,2665],{},"tracer.start_as_current_span()"," to create a span per stage and call ",[240,2668,2669],{},"span.set_attribute()"," to record attributes like ",[240,2672,459],{},[240,2674,576],{},". Export spans via OTLP to any compatible backend.",[217,2677,2678],{},[255,2679,2680],{},"Does LlamaIndex support OpenTelemetry?",[217,2682,2683,2684,2686,2687,2689,2690,2692],{},"Yes. Install ",[240,2685,694],{}," (recommended, maintained by Arize\u002FOpenInference) or ",[240,2688,706],{}," (from OpenLLMetry\u002FServiceNow) and call ",[240,2691,1028],{}," once at startup. Both automatically instrument LLM calls, embedding calls, and vector store retrievers using the OpenInference semantic conventions. Spans are exported via the standard OTLP exporter.",[217,2694,2695],{},[255,2696,2697],{},"Does LangChain support OpenTelemetry tracing?",[217,2699,2683,2700,2702,2703,2705,2706,2708,2709,2711],{},[240,2701,1113],{}," and call ",[240,2704,2661],{},". The instrumentation wraps LangChain chains and produces spans for each component. It captures ",[240,2707,242],{}," attributes on LLM calls and retriever metadata, though some RAG-specific attributes like ",[240,2710,474],{}," require manual instrumentation.",[217,2713,2714],{},[255,2715,2716],{},"What attributes should I add to RAG spans?",[217,2718,2719,2720,2722,2723,2725,2726,2722,2728,2730,2731,2733],{},"The most operationally useful attributes are: ",[240,2721,474],{}," (boolean), ",[240,2724,459],{}," (integer), ",[240,2727,576],{},[240,2729,561],{}," (integer), and ",[240,2732,653],{}," (string array). These attributes are sufficient to detect and diagnose the most common silent failures in a RAG pipeline.",[217,2735,2736],{},[255,2737,2738],{},"How is RAG observability different from standard LLM observability?",[217,2740,2741,2745],{},[227,2742,2744],{"href":2743},"\u002Fglossary\u002Fllm-observability","LLM observability"," focuses on a single model interaction: token usage, latency, cost, and finish reason. RAG observability adds the retrieval layer — you need to trace what was retrieved, how much, whether the retrieval was empty, and whether the context was truncated before it reached the model. An LLM call with zero retrieved context looks identical to one with ten chunks unless you instrument the retrieval stages explicitly.",[245,2747,2749],{"id":2748},"conclusion","Conclusion",[217,2751,2752,2753,2528,2755,2757],{},"RAG pipelines fail in ways that standard observability tools cannot detect: empty vector search results, silent context truncation, and ",[240,2754,2629],{},[240,2756,670],{}," on the LLM response. The fix is span-level instrumentation at each stage with attributes that expose these states directly.",[217,2759,2760,2761,2763,2764,1099,2766,2768],{},"For LlamaIndex and LangChain, a single instrumentation call handles the most common spans automatically — note that auto-instrumentation libraries emit their own attribute schemas (OpenInference for LlamaIndex, OpenLLMetry for LangChain), which differ from the custom ",[240,2762,327],{}," attributes used in manual instrumentation. For custom pipelines, manual spans with ",[240,2765,474],{},[240,2767,576],{}," cover the two most critical failure modes. Export spans to Uptrace or any OTLP-compatible backend and filter on these attributes to find failures immediately rather than through user complaints.",[217,2770,2771,2772,2774],{},"The OTel GenAI SIG is working toward official RAG semantic conventions. Until that specification stabilizes, the custom ",[240,2773,327],{}," attributes used in manual instrumentation and the OpenInference attributes emitted by auto-instrumentation are both supported by Uptrace's attribute indexing.",[2776,2777,2778],"style",{},"html pre.shiki code .s7eDp, html code.shiki .s7eDp{--shiki-default:#6F42C1}html pre.shiki code .sYBdl, html code.shiki .sYBdl{--shiki-default:#032F62}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .sD7c4, html code.shiki .sD7c4{--shiki-default:#D73A49}html pre.shiki code .sgsFI, html code.shiki .sgsFI{--shiki-default:#24292E}html pre.shiki code .sqxcx, html code.shiki .sqxcx{--shiki-default:#E36209}html pre.shiki code .sAwPA, html code.shiki .sAwPA{--shiki-default:#6A737D}html pre.shiki code .sYu0t, html code.shiki .sYu0t{--shiki-default:#005CC5}",{"title":717,"searchDepth":785,"depth":758,"links":2780},[2781,2782,2789,2790,2791,2792,2793,2794,2795],{"id":247,"depth":758,"text":248},{"id":296,"depth":758,"text":297,"children":2783},[2784,2785,2786,2787,2788],{"id":336,"depth":772,"text":337},{"id":408,"depth":772,"text":409},{"id":491,"depth":772,"text":492},{"id":539,"depth":772,"text":540},{"id":586,"depth":772,"text":587},{"id":681,"depth":758,"text":682},{"id":1106,"depth":758,"text":1107},{"id":1508,"depth":758,"text":1509},{"id":2422,"depth":758,"text":2423},{"id":2545,"depth":758,"text":2546},{"id":2638,"depth":758,"text":2639},{"id":2748,"depth":758,"text":2749},"2026-04-16","How to trace every stage of a RAG pipeline with OpenTelemetry — embedding, vector search, reranking, context assembly, and LLM call — using LlamaIndex, LangChain, and Uptrace.","md","\u002Fguides\u002Fopentelemetry-rag-observability\u002Fcover.webp",{"readingTime":2801},{"text":2802,"minutes":2803,"time":2804,"words":2805},"10 min read",9.32,559200,1864,{"title":161,"description":2797},"jY5dqMGok2VABa3VAXUfRM0CZf8l3REHyyxE1zp50yg",[2809,2811],{"title":157,"path":158,"stem":159,"description":2810,"children":-1},"Monitor your RabbitMQ cluster performance for free using Uptrace and OpenTelemetry Collector receiver. Track messages, queues, and broker health.",{"title":165,"path":166,"stem":167,"description":2812,"children":-1},"Rails monitoring guide using OpenTelemetry. Monitor Rails app performance, ActiveRecord queries, and server metrics with distributed tracing and APM capabilities.",1778588614444]