OpenTelemetry Sampling [Erlang/Elixir]

What is sampling?

Sampling is a process that restricts the amount of traces that are generated by a system. In high-volume applications, collecting 100% of traces can be expensive and unnecessary. Sampling allows you to collect a representative subset of traces while reducing costs and performance overhead.

Erlang/Elixir sampling

OpenTelemetry Erlang/Elixir SDK provides head-based sampling capabilities where the sampling decision is made at the beginning of a trace. By default, all spans are sampled (100% sampling rate). The SDK offers several built-in samplers that can be configured through application configuration or environment variables.

Built-in samplers

ParentBasedSampler

The most commonly used sampler for head sampling. It uses the sampling decision of the span's parent, or uses a root sampler when there's no parent:

elixir Elixir
# config/runtime.exs
config :opentelemetry,
  sampler: {:parent_based, %{
    root: {:trace_id_ratio_based, 0.10},
    remote_parent_sampled: :always_on,
    remote_parent_not_sampled: :always_off,
    local_parent_sampled: :always_on,
    local_parent_not_sampled: :always_off
  }}
erlang Erlang
%% config/sys.config.src
{opentelemetry, [
  {sampler, {parent_based, #{
    root => {trace_id_ratio_based, 0.10},
    remote_parent_sampled => always_on,
    remote_parent_not_sampled => always_off,
    local_parent_sampled => always_on,
    local_parent_not_sampled => always_off
  }}}
]}

TraceIdRatioBasedSampler

Deterministically samples a percentage of traces based on the trace ID:

elixir Elixir
# config/runtime.exs
config :opentelemetry,
  sampler: {:trace_id_ratio_based, 0.25}  # Sample 25% of traces
erlang Erlang
%% config/sys.config.src
{opentelemetry, [
  {sampler, {trace_id_ratio_based, 0.25}}
]}

AlwaysOnSampler

Samples 100% of traces. This is the default behavior:

elixir Elixir
# config/runtime.exs
config :opentelemetry,
  sampler: :always_on
erlang Erlang
%% config/sys.config.src
{opentelemetry, [
  {sampler, always_on}
]}

AlwaysOffSampler

Disables tracing completely (0% sampling):

elixir Elixir
# config/runtime.exs
config :opentelemetry,
  sampler: :always_off
erlang Erlang
%% config/sys.config.src
{opentelemetry, [
  {sampler, always_off}
]}

Configuration in Erlang/Elixir

Environment variable

You can configure sampling using environment variables instead of application configuration:

bash
# ParentBased with TraceIdRatio - 10% sampling
export OTEL_TRACES_SAMPLER="parentbased_traceidratio"
export OTEL_TRACES_SAMPLER_ARG="0.1"

# ParentBased with AlwaysOn
export OTEL_TRACES_SAMPLER="parentbased_always_on"

# Simple TraceIdRatio - 5% sampling
export OTEL_TRACES_SAMPLER="traceidratio"
export OTEL_TRACES_SAMPLER_ARG="0.05"

# AlwaysOn/AlwaysOff
export OTEL_TRACES_SAMPLER="always_on"

Programmatic configuration

elixir
# config/runtime.exs
import Config

# Different sampling for different environments
sampling_config = case config_env() do
  :dev ->
    # Sample everything in development
    :always_on

  :test ->
    # Disable sampling in tests
    :always_off

  :prod ->
    # Use parent-based sampling with 5% for root spans
    {:parent_based, %{
      root: {:trace_id_ratio_based, 0.05},
      remote_parent_sampled: :always_on,
      remote_parent_not_sampled: :always_off,
      local_parent_sampled: :always_on,
      local_parent_not_sampled: :always_off
    }}
end

config :opentelemetry,
  sampler: sampling_config,
  span_processor: :batch,
  traces_exporter: :otlp

Custom sampler

Create custom sampling logic by implementing the :otel_sampler behaviour:

elixir Elixir
defmodule MyApp.CustomSampler do
  require OpenTelemetry.Tracer, as: Tracer
  @behaviour :otel_sampler

  @impl :otel_sampler
  def setup(_sampler_opts), do: []

  @impl :otel_sampler
  def description(_sampler_config), do: "MyApp.CustomSampler"

  @impl :otel_sampler
  def should_sample(ctx, _trace_id, _links, span_name, _span_kind, attributes, _sampler_config) do
    tracestate = Tracer.current_span_ctx(ctx) |> OpenTelemetry.Span.tracestate()

    sample_decision = cond do
      # Always sample error traces
      has_error_attribute?(attributes) -> true
      # Always sample critical operations
      String.contains?(span_name, ["critical", "payment", "auth"]) -> true
      # Sample 10% of regular traces
      :rand.uniform() < 0.1 -> true
      true -> false
    end

    case sample_decision do
      true -> {:record_and_sample, [], tracestate}
      false -> {:drop, [], tracestate}
    end
  end

  defp has_error_attribute?(attributes) do
    Enum.any?(attributes, fn {key, value} -> key == "error" and value == true end)
  end
end

# Configuration
config :opentelemetry,
  sampler: {:parent_based, %{root: {MyApp.CustomSampler, %{custom_arg: "value"}}}},
  span_processor: :batch,
  traces_exporter: :otlp
erlang Erlang
-module(my_custom_sampler).
-behaviour(otel_sampler).
-export([setup/1, description/1, should_sample/7]).

setup(_SamplerOpts) -> [].

description(_SamplerConfig) -> "MyCustomSampler".

should_sample(Ctx, _TraceId, _Links, SpanName, _SpanKind, Attributes, _SamplerConfig) ->
    TraceState = otel_span:tracestate(otel_tracer:current_span_ctx(Ctx)),

    SampleDecision = case {has_error_attribute(Attributes),
                          is_critical_operation(SpanName)} of
        {true, _} -> true;  % Always sample errors
        {_, true} -> true;  % Always sample critical operations
        {false, false} -> rand:uniform() < 0.1  % 10% of regular traces
    end,

    case SampleDecision of
        true -> {record_and_sample, [], TraceState};
        false -> {drop, [], TraceState}
    end.

has_error_attribute(Attributes) ->
    lists:any(fun({Key, Value}) ->
        Key =:= <<"error">> andalso Value =:= true
    end, Attributes).

is_critical_operation(SpanName) ->
    CriticalKeywords = [<<"critical">>, <<"payment">>, <<"auth">>],
    lists:any(fun(Keyword) ->
        binary:match(SpanName, Keyword) =/= nomatch
    end, CriticalKeywords).

% Configuration in sys.config:
% {opentelemetry, [
%   {sampler, {parent_based, #{root => {my_custom_sampler, #{custom_arg => value}}}}}
% ]}

Advanced scenarios

Sampling based on attributes

elixir Elixir
defmodule MyApp.ConditionalSampler do
  @behaviour :otel_sampler

  @impl :otel_sampler
  def setup(_opts), do: []

  @impl :otel_sampler
  def description(_config), do: "ConditionalSampler"

  @impl :otel_sampler
  def should_sample(ctx, _trace_id, _links, span_name, _span_kind, _attributes, _config) do
    tracestate = OpenTelemetry.Tracer.current_span_ctx(ctx)
                 |> OpenTelemetry.Span.tracestate()

    should_sample = cond do
      # Always sample requests to admin endpoints
      String.contains?(span_name, "/admin") -> true
      # Sample 50% of API requests
      String.starts_with?(span_name, "/api") -> :rand.uniform() < 0.5
      # Don't sample health checks
      span_name == "/health" -> false
      # Default sampling rate
      true -> :rand.uniform() < 0.1
    end

    case should_sample do
      true -> {:record_and_sample, [], tracestate}
      false -> {:drop, [], tracestate}
    end
  end
end
erlang Erlang
-module(conditional_sampler).
-behaviour(otel_sampler).
-export([setup/1, description/1, should_sample/7]).

setup(_Opts) -> [].
description(_Config) -> "ConditionalSampler".

should_sample(Ctx, _TraceId, _Links, SpanName, _SpanKind, _Attributes, _Config) ->
    TraceState = otel_span:tracestate(otel_tracer:current_span_ctx(Ctx)),

    ShouldSample = case SpanName of
        <<"/admin", _/binary>> -> true;  % Always sample admin endpoints
        <<"/api", _/binary>> -> rand:uniform() < 0.5;  % 50% of API requests
        <<"/health">> -> false;  % Don't sample health checks
        _ -> rand:uniform() < 0.1  % Default 10%
    end,

    case ShouldSample of
        true -> {record_and_sample, [], TraceState};
        false -> {drop, [], TraceState}
    end.

Production deployment

Phoenix application with sampling

elixir Application
# lib/my_app/application.ex
defmodule MyApp.Application do
  use Application

  def start(_type, _args) do
    # Setup OpenTelemetry instrumentations
    :opentelemetry_cowboy.setup()
    OpentelemetryPhoenix.setup(adapter: :cowboy2)
    OpentelemetryEcto.setup([:my_app, :repo])

    children = [
      MyApp.Repo,
      {Phoenix.PubSub, name: MyApp.PubSub},
      MyAppWeb.Endpoint
    ]

    opts = [strategy: :one_for_one, name: MyApp.Supervisor]
    Supervisor.start_link(children, opts)
  end
end
elixir Configuration
# config/runtime.exs
config :opentelemetry,
  sampler: get_sampler_config(),
  span_processor: :batch,
  traces_exporter: :otlp

config :opentelemetry_exporter,
  otlp_protocol: :grpc,
  otlp_endpoint: "https://api.uptrace.dev:4317",
  otlp_headers: [{"uptrace-dsn", System.get_env("UPTRACE_DSN")}]

defp get_sampler_config do
  case System.get_env("MIX_ENV", "dev") do
    "dev" -> :always_on
    "prod" -> {:parent_based, %{root: {:trace_id_ratio_based, 0.1}}}
    _ -> :always_off
  end
end

Manual span creation with sampling checks

elixir Elixir
defmodule MyApp.OrderService do
  require OpenTelemetry.Tracer

  def process_order(order_data) do
    OpenTelemetry.Tracer.with_span "process_order" do
      # Check if span is being recorded to avoid expensive operations
      current_span = OpenTelemetry.Tracer.current_span_ctx()

      if OpenTelemetry.Span.is_recording(current_span) do
        OpenTelemetry.Tracer.set_attributes(%{
          "order.id" => order_data.id,
          "order.amount" => order_data.amount,
          "order.customer_type" => order_data.customer_type
        })
      end

      # Business logic
      result = perform_order_processing(order_data)

      # Only add expensive attributes if recording
      if OpenTelemetry.Span.is_recording(current_span) do
        OpenTelemetry.Tracer.add_event("order_processed", %{
          "result.status" => result.status
        })
      end

      result
    end
  end

  defp perform_order_processing(_order_data) do
    # Simulate processing
    :timer.sleep(100)
    %{status: "completed"}
  end
end
erlang Erlang
-module(order_service).
-export([process_order/1]).
-include_lib("opentelemetry_api/include/otel_tracer.hrl").

process_order(OrderData) ->
    ?with_span(<<"process_order">>, #{}, fun() ->
        CurrentSpan = otel_tracer:current_span_ctx(),

        % Check if span is being recorded
        case otel_span:is_recording(CurrentSpan) of
            true ->
                ?set_attributes([
                    {<<"order.id">>, maps:get(id, OrderData)},
                    {<<"order.amount">>, maps:get(amount, OrderData)},
                    {<<"order.customer_type">>, maps:get(customer_type, OrderData)}
                ]);
            false ->
                ok
        end,

        % Business logic
        Result = perform_order_processing(OrderData),

        % Only add expensive attributes if recording
        case otel_span:is_recording(CurrentSpan) of
            true ->
                ?add_event(<<"order_processed">>, [
                    {<<"result.status">>, maps:get(status, Result)}
                ]);
            false ->
                ok
        end,

        Result
    end).

perform_order_processing(_OrderData) ->
    timer:sleep(100),
    #{status => <<"completed">>}.

Monitoring sampling

Check sampling effectiveness

elixir
defmodule MyApp.SamplingChecker do
  require Logger

  def check_sampling_rate do
    # Get span statistics from your monitoring system
    stats = get_span_statistics()

    if stats.total > 0 do
      sampling_rate = stats.sampled / stats.total * 100

      Logger.info("Current sampling rate: #{Float.round(sampling_rate, 2)}% " <>
                  "(#{stats.sampled}/#{stats.total} spans)")

      # Alert if sampling rate is unexpected
      expected_rate = get_expected_sampling_rate()

      if abs(sampling_rate - expected_rate) > 5.0 do
        Logger.warning("Sampling rate #{Float.round(sampling_rate, 2)}% " <>
                       "differs from expected #{expected_rate}%")
      end
    end
  end

  defp get_span_statistics do
    # This would be implemented based on your monitoring setup
    %{total: 1000, sampled: 100}
  end

  defp get_expected_sampling_rate do
    case Application.get_env(:my_app, :environment) do
      :prod -> 10.0
      :staging -> 50.0
      _ -> 100.0
    end
  end
end

What's next?