OpenTelemetry Sampling [Ruby]

What is sampling?

Sampling is a process that restricts the amount of traces that are generated by a system. In high-volume applications, collecting 100% of traces can be expensive and unnecessary. Sampling allows you to collect a representative subset of traces while reducing costs and performance overhead.

Ruby sampling

OpenTelemetry Ruby SDK provides head-based sampling capabilities where the sampling decision is made at the beginning of a trace. By default, the tracer provider uses a ParentBased sampler with the AlwaysOnSampler. A sampler can be set on the tracer provider when creating it.

Built-in samplers

AlwaysOnSampler

Samples every trace. Useful for development environments but be careful in production with significant traffic:

ruby
require 'opentelemetry/sdk'

OpenTelemetry::SDK.configure do |c|
  c.service_name = 'my-service'
  c.sampler = OpenTelemetry::SDK::Trace::Samplers::AlwaysOn.new
end

AlwaysOffSampler

Samples no traces. Useful for completely disabling tracing:

ruby
OpenTelemetry::SDK.configure do |c|
  c.service_name = 'my-service'
  c.sampler = OpenTelemetry::SDK::Trace::Samplers::AlwaysOff.new
end

TraceIdRatioBasedSampler

Samples a fraction of spans based on the trace ID. The fraction should be between 0.0 and 1.0:

ruby
# Sample 10% of traces
OpenTelemetry::SDK.configure do |c|
  c.service_name = 'my-service'
  c.sampler = OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.1)
end

# Sample 50% of traces
OpenTelemetry::SDK.configure do |c|
  c.service_name = 'my-service'
  c.sampler = OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.5)
end

ParentBasedSampler

A sampler decorator that behaves differently based on the parent of the span. If the span has no parent, the decorated sampler is used to make the sampling decision:

ruby
# ParentBased with TraceIdRatioBased root sampler
OpenTelemetry::SDK.configure do |c|
  c.service_name = 'my-service'
  c.sampler = OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
    root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.1)
  )
end

# ParentBased with AlwaysOnSampler root sampler (default behavior)
OpenTelemetry::SDK.configure do |c|
  c.service_name = 'my-service'
  c.sampler = OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
    root: OpenTelemetry::SDK::Trace::Samplers::AlwaysOn.new
  )
end

Configuration in Ruby

Environment variables

You can configure sampling using environment variables:

bash
# TraceIdRatio sampler with 50% sampling
export OTEL_TRACES_SAMPLER="traceidratio"
export OTEL_TRACES_SAMPLER_ARG="0.5"

# ParentBased with TraceIdRatio
export OTEL_TRACES_SAMPLER="parentbased_traceidratio"
export OTEL_TRACES_SAMPLER_ARG="0.1"

# Always sample
export OTEL_TRACES_SAMPLER="always_on"

# Never sample
export OTEL_TRACES_SAMPLER="always_off"

Programmatic config

ruby
require 'opentelemetry/sdk'

class TracingSetup
  def self.configure
    # Create OTLP exporter
    exporter = OpenTelemetry::Exporter::OTLP::Exporter.new(
      endpoint: 'https://api.uptrace.dev/v1/traces',
      headers: { 'uptrace-dsn' => ENV['UPTRACE_DSN'] }
    )

    # Create resource
    resource = OpenTelemetry::SDK::Resources::Resource.create({
      'service.name' => 'my-service',
      'service.version' => '1.0.0'
    })

    # Configure sampler based on environment
    sampler = sampler_for_environment(ENV['APP_ENV'] || 'development')

    # Create tracer provider
    OpenTelemetry::SDK.configure do |c|
      c.resource = resource
      c.sampler = sampler

      c.add_span_processor(
        OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(exporter)
      )
    end
  end

  private

  def self.sampler_for_environment(env)
    case env
    when 'development'
      OpenTelemetry::SDK::Trace::Samplers::AlwaysOn.new
    when 'production'
      OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
        root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.1) # 10% sampling
      )
    when 'testing'
      OpenTelemetry::SDK::Trace::Samplers::AlwaysOff.new
    else
      OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
        root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.25) # 25% sampling
      )
    end
  end
end

# Usage
TracingSetup.configure
tracer = OpenTelemetry.tracer_provider.tracer('my_app', '1.0.0')

Rails-specific config

ruby
# config/initializers/opentelemetry.rb
require 'opentelemetry/sdk'
require 'opentelemetry/instrumentation/all'

class RailsTracingConfig
  def self.setup
    OpenTelemetry::SDK.configure do |c|
      c.service_name = Rails.application.class.module_parent_name.downcase
      c.service_version = ENV['APP_VERSION'] || '1.0.0'

      c.resource = OpenTelemetry::SDK::Resources::Resource.create({
        'deployment.environment' => Rails.env,
        'framework.name' => 'rails',
        'framework.version' => Rails::VERSION::STRING
      })

      # Environment-based sampling
      c.sampler = case Rails.env
                  when 'development'
                    OpenTelemetry::SDK::Trace::Samplers::AlwaysOn.new
                  when 'test'
                    OpenTelemetry::SDK::Trace::Samplers::AlwaysOff.new
                  when 'production'
                    OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
                      root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.05) # 5% in production
                    )
                  else
                    OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
                      root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.1)
                    )
                  end

      c.use_all() # enables all instrumentation
    end
  end
end

RailsTracingConfig.setup

Custom sampler

You can create custom sampling logic by implementing the Sampler interface:

ruby
class CustomSampler
  def initialize(high_priority_rate: 1.0, default_rate: 0.1)
    @high_priority_rate = high_priority_rate
    @default_rate = default_rate
  end

  def should_sample?(trace_id:, parent_context:, links:, name:, kind:, attributes:)
    # Sample high-priority operations at higher rate
    rate = if attributes && attributes['priority'] == 'high'
             @high_priority_rate
           else
             @default_rate
           end

    # Use trace_id for deterministic sampling
    sampling_threshold = (rate * (2**63 - 1)).to_i
    trace_id_int = trace_id.unpack1('Q>')

    if trace_id_int < sampling_threshold
      OpenTelemetry::SDK::Trace::Samplers::Result.create(
        decision: OpenTelemetry::SDK::Trace::Samplers::Decision::RECORD_AND_SAMPLE
      )
    else
      OpenTelemetry::SDK::Trace::Samplers::Result.create(
        decision: OpenTelemetry::SDK::Trace::Samplers::Decision::DROP
      )
    end
  end

  def description
    "CustomSampler{high_priority_rate=#{@high_priority_rate}, default_rate=#{@default_rate}}"
  end
end

# Use custom sampler
OpenTelemetry::SDK.configure do |c|
  c.service_name = 'my-service'
  c.sampler = CustomSampler.new(high_priority_rate: 1.0, default_rate: 0.1)
end

Sampler with caching

ruby
class CachedRateLimitSampler
  def initialize(max_traces_per_second: 100)
    @max_traces_per_second = max_traces_per_second
    @trace_count = 0
    @last_reset = Time.now
    @mutex = Mutex.new
  end

  def should_sample?(trace_id:, parent_context:, links:, name:, kind:, attributes:)
    @mutex.synchronize do
      current_time = Time.now

      # Reset counter every second
      if current_time - @last_reset >= 1.0
        @trace_count = 0
        @last_reset = current_time
      end

      # Check if we've exceeded the rate limit
      if @trace_count < @max_traces_per_second
        @trace_count += 1
        OpenTelemetry::SDK::Trace::Samplers::Result.create(
          decision: OpenTelemetry::SDK::Trace::Samplers::Decision::RECORD_AND_SAMPLE
        )
      else
        OpenTelemetry::SDK::Trace::Samplers::Result.create(
          decision: OpenTelemetry::SDK::Trace::Samplers::Decision::DROP
        )
      end
    end
  end

  def description
    "CachedRateLimitSampler{max_traces_per_second=#{@max_traces_per_second}}"
  end
end

# Usage
OpenTelemetry::SDK.configure do |c|
  c.service_name = 'high-traffic-service'
  c.sampler = CachedRateLimitSampler.new(max_traces_per_second: 50)
end

Debugging sampling

Check sampling

ruby
def log_sampling_decision(span_name)
  tracer = OpenTelemetry.tracer_provider.tracer('debug_sampler', '1.0.0')

  tracer.in_span(span_name) do |span|
    if span.context.valid?
      puts "Span '#{span_name}' - Trace ID: #{span.context.hex_trace_id}"
      puts "Sampled: #{span.context.trace_flags.sampled?}"
      puts "Recording: #{span.recording?}"
    else
      puts "Span '#{span_name}' - Invalid context (likely dropped)"
    end
  end
end

# Test sampling
10.times do |i|
  log_sampling_decision("test-span-#{i}")
end

Monitor sampling rates

ruby
class SamplingMonitor
  def initialize
    @total_spans = 0
    @sampled_spans = 0
    @start_time = Time.now
    @mutex = Mutex.new
  end

  def record_span(span)
    @mutex.synchronize do
      @total_spans += 1
      @sampled_spans += 1 if span.context.valid? && span.context.trace_flags.sampled?
    end
  end

  def sampling_rate
    @mutex.synchronize do
      return 0.0 if @total_spans == 0
      @sampled_spans.to_f / @total_spans
    end
  end

  def stats
    @mutex.synchronize do
      elapsed = Time.now - @start_time
      {
        total_spans: @total_spans,
        sampled_spans: @sampled_spans,
        sampling_rate: sampling_rate,
        elapsed_seconds: elapsed,
        spans_per_second: elapsed > 0 ? @total_spans / elapsed : 0
      }
    end
  end
end

# Usage
monitor = SamplingMonitor.new
tracer = OpenTelemetry.tracer_provider.tracer('monitored_service', '1.0.0')

100.times do |i|
  tracer.in_span("test-span-#{i}") do |span|
    monitor.record_span(span)
    sleep(0.01)
  end
end

puts monitor.stats

Sampling middleware

ruby
class SamplingDebugMiddleware
  def initialize(app)
    @app = app
    @monitor = SamplingMonitor.new
  end

  def call(env)
    request = Rack::Request.new(env)

    # Log sampling info for debugging
    current_span = OpenTelemetry::Trace.current_span
    if current_span&.context&.valid?
      @monitor.record_span(current_span)

      Rails.logger.debug({
        message: 'Request sampling info',
        path: request.path,
        trace_id: current_span.context.hex_trace_id,
        sampled: current_span.context.trace_flags.sampled?,
        recording: current_span.recording?
      })
    end

    status, headers, body = @app.call(env)

    # Log stats periodically
    stats = @monitor.stats
    if stats[:total_spans] % 100 == 0
      Rails.logger.info("Sampling stats: #{stats}")
    end

    [status, headers, body]
  end
end

# Add to Rails application in development
# config/environments/development.rb
Rails.application.configure do
  config.middleware.use SamplingDebugMiddleware
end

Production considerations

Sampling in microservices

In a microservices architecture, sampling decisions should be made at the root of the trace and propagated to all services. This ensures consistent sampling across the entire distributed trace.

ruby
# Use ParentBased sampler in all services
class MicroserviceTracingConfig
  def self.setup_for_service(service_name)
    OpenTelemetry::SDK.configure do |c|
      c.service_name = service_name

      # Root services (entry points) use TraceIdRatioBased
      # Downstream services respect parent sampling decision
      c.sampler = OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
        root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.1)
      )

      c.use_all()
    end
  end
end

# Gateway service (entry point)
MicroserviceTracingConfig.setup_for_service('api-gateway')

# User service (downstream)
MicroserviceTracingConfig.setup_for_service('user-service')

# Order service (downstream)
MicroserviceTracingConfig.setup_for_service('order-service')

Performance impact

Sampling reduces the performance overhead of tracing:

  • CPU usage: Fewer spans to process and export
  • Memory usage: Smaller trace buffers
  • Network usage: Less data sent to backend
  • Storage costs: Reduced storage requirements

Memory-aware sampling

ruby
class MemoryAwareSampler
  def initialize(base_rate: 0.1, memory_threshold_mb: 500)
    @base_rate = base_rate
    @memory_threshold_mb = memory_threshold_mb
  end

  def should_sample?(trace_id:, parent_context:, links:, name:, kind:, attributes:)
    # Adjust sampling rate based on memory usage
    current_memory_mb = get_memory_usage_mb

    rate = if current_memory_mb > @memory_threshold_mb
             @base_rate * 0.5  # Reduce sampling when memory is high
           else
             @base_rate
           end

    # Use trace_id for deterministic sampling
    sampling_threshold = (rate * (2**63 - 1)).to_i
    trace_id_int = trace_id.unpack1('Q>')

    if trace_id_int < sampling_threshold
      OpenTelemetry::SDK::Trace::Samplers::Result.create(
        decision: OpenTelemetry::SDK::Trace::Samplers::Decision::RECORD_AND_SAMPLE
      )
    else
      OpenTelemetry::SDK::Trace::Samplers::Result.create(
        decision: OpenTelemetry::SDK::Trace::Samplers::Decision::DROP
      )
    end
  end

  def description
    "MemoryAwareSampler{base_rate=#{@base_rate}, threshold=#{@memory_threshold_mb}MB}"
  end

  private

  def get_memory_usage_mb
    # Get Ruby process memory usage
    if File.exist?("/proc/#{Process.pid}/status")
      status = File.read("/proc/#{Process.pid}/status")
      if match = status.match(/VmRSS:\s+(\d+)\s+kB/)
        return match[1].to_i / 1024  # Convert KB to MB
      end
    end

    # Fallback: use GC stats if available
    if GC.respond_to?(:stat)
      stat = GC.stat
      heap_pages = stat[:heap_allocated_pages] || 0
      heap_pages * 16384 / 1024 / 1024  # Approximate MB
    else
      0
    end
  rescue StandardError
    0  # Return 0 if unable to determine memory usage
  end
end

# Usage
OpenTelemetry::SDK.configure do |c|
  c.service_name = 'memory-sensitive-service'
  c.sampler = MemoryAwareSampler.new(base_rate: 0.1, memory_threshold_mb: 400)
end

Sampling strategies

ruby
class EnvironmentBasedSampling
  SAMPLING_CONFIGS = {
    'production' => {
      rate: 0.01,        # 1% sampling in production
      description: 'Conservative sampling for production load'
    },
    'staging' => {
      rate: 0.1,         # 10% sampling in staging
      description: 'Moderate sampling for staging validation'
    },
    'development' => {
      rate: 1.0,         # 100% sampling in development
      description: 'Full sampling for development debugging'
    },
    'test' => {
      rate: 0.0,         # No sampling in tests
      description: 'Disabled sampling for test performance'
    }
  }.freeze

  def self.sampler_for_environment(env)
    config = SAMPLING_CONFIGS[env] || SAMPLING_CONFIGS['development']

    case config[:rate]
    when 0.0
      OpenTelemetry::SDK::Trace::Samplers::AlwaysOff.new
    when 1.0
      OpenTelemetry::SDK::Trace::Samplers::AlwaysOn.new
    else
      OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
        root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(config[:rate])
      )
    end
  end

  def self.configure_for_rails
    env = Rails.env
    sampler = sampler_for_environment(env)
    config = SAMPLING_CONFIGS[env] || {}

    Rails.logger.info("Configuring OpenTelemetry sampling for #{env}: #{config[:description]}")

    OpenTelemetry::SDK.configure do |c|
      c.service_name = Rails.application.class.module_parent_name.downcase
      c.sampler = sampler
      c.use_all()
    end
  end
end

# Usage in Rails initializer
EnvironmentBasedSampling.configure_for_rails

Background job

ruby
class BackgroundJobSampler
  def initialize(job_sampling_rates = {})
    @job_sampling_rates = {
      'high_priority' => 1.0,    # Always sample high priority jobs
      'normal' => 0.1,           # 10% of normal jobs
      'low_priority' => 0.01     # 1% of low priority jobs
    }.merge(job_sampling_rates)
  end

  def should_sample?(trace_id:, parent_context:, links:, name:, kind:, attributes:)
    # Determine job priority from attributes or span name
    priority = determine_job_priority(name, attributes)
    rate = @job_sampling_rates[priority] || @job_sampling_rates['normal']

    # Use trace_id for deterministic sampling
    sampling_threshold = (rate * (2**63 - 1)).to_i
    trace_id_int = trace_id.unpack1('Q>')

    if trace_id_int < sampling_threshold
      OpenTelemetry::SDK::Trace::Samplers::Result.create(
        decision: OpenTelemetry::SDK::Trace::Samplers::Decision::RECORD_AND_SAMPLE
      )
    else
      OpenTelemetry::SDK::Trace::Samplers::Result.create(
        decision: OpenTelemetry::SDK::Trace::Samplers::Decision::DROP
      )
    end
  end

  def description
    "BackgroundJobSampler{rates=#{@job_sampling_rates}}"
  end

  private

  def determine_job_priority(name, attributes)
    # Check attributes first
    return attributes['job.priority'] if attributes&.key?('job.priority')

    # Infer from job name
    case name
    when /urgent|critical|high/i
      'high_priority'
    when /low|batch|cleanup/i
      'low_priority'
    else
      'normal'
    end
  end
end

# Configure for Sidekiq
OpenTelemetry::SDK.configure do |c|
  c.service_name = 'background-workers'
  c.sampler = BackgroundJobSampler.new({
    'urgent' => 1.0,
    'normal' => 0.05,
    'cleanup' => 0.001
  })

  c.use_all()
end

What's next?