OpenTelemetry Sampling [Ruby]
What is sampling?
Sampling is a process that restricts the amount of traces that are generated by a system. In high-volume applications, collecting 100% of traces can be expensive and unnecessary. Sampling allows you to collect a representative subset of traces while reducing costs and performance overhead.
Ruby sampling
OpenTelemetry Ruby SDK provides head-based sampling capabilities where the sampling decision is made at the beginning of a trace. By default, the tracer provider uses a ParentBased sampler with the AlwaysOnSampler. A sampler can be set on the tracer provider when creating it.
Built-in samplers
AlwaysOnSampler
Samples every trace. Useful for development environments but be careful in production with significant traffic:
require 'opentelemetry/sdk'
OpenTelemetry::SDK.configure do |c|
c.service_name = 'my-service'
c.sampler = OpenTelemetry::SDK::Trace::Samplers::AlwaysOn.new
end
AlwaysOffSampler
Samples no traces. Useful for completely disabling tracing:
OpenTelemetry::SDK.configure do |c|
c.service_name = 'my-service'
c.sampler = OpenTelemetry::SDK::Trace::Samplers::AlwaysOff.new
end
TraceIdRatioBasedSampler
Samples a fraction of spans based on the trace ID. The fraction should be between 0.0 and 1.0:
# Sample 10% of traces
OpenTelemetry::SDK.configure do |c|
c.service_name = 'my-service'
c.sampler = OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.1)
end
# Sample 50% of traces
OpenTelemetry::SDK.configure do |c|
c.service_name = 'my-service'
c.sampler = OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.5)
end
ParentBasedSampler
A sampler decorator that behaves differently based on the parent of the span. If the span has no parent, the decorated sampler is used to make the sampling decision:
# ParentBased with TraceIdRatioBased root sampler
OpenTelemetry::SDK.configure do |c|
c.service_name = 'my-service'
c.sampler = OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.1)
)
end
# ParentBased with AlwaysOnSampler root sampler (default behavior)
OpenTelemetry::SDK.configure do |c|
c.service_name = 'my-service'
c.sampler = OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
root: OpenTelemetry::SDK::Trace::Samplers::AlwaysOn.new
)
end
Configuration in Ruby
Environment variables
You can configure sampling using environment variables:
# TraceIdRatio sampler with 50% sampling
export OTEL_TRACES_SAMPLER="traceidratio"
export OTEL_TRACES_SAMPLER_ARG="0.5"
# ParentBased with TraceIdRatio
export OTEL_TRACES_SAMPLER="parentbased_traceidratio"
export OTEL_TRACES_SAMPLER_ARG="0.1"
# Always sample
export OTEL_TRACES_SAMPLER="always_on"
# Never sample
export OTEL_TRACES_SAMPLER="always_off"
Programmatic config
require 'opentelemetry/sdk'
class TracingSetup
def self.configure
# Create OTLP exporter
exporter = OpenTelemetry::Exporter::OTLP::Exporter.new(
endpoint: 'https://api.uptrace.dev/v1/traces',
headers: { 'uptrace-dsn' => ENV['UPTRACE_DSN'] }
)
# Create resource
resource = OpenTelemetry::SDK::Resources::Resource.create({
'service.name' => 'my-service',
'service.version' => '1.0.0'
})
# Configure sampler based on environment
sampler = sampler_for_environment(ENV['APP_ENV'] || 'development')
# Create tracer provider
OpenTelemetry::SDK.configure do |c|
c.resource = resource
c.sampler = sampler
c.add_span_processor(
OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(exporter)
)
end
end
private
def self.sampler_for_environment(env)
case env
when 'development'
OpenTelemetry::SDK::Trace::Samplers::AlwaysOn.new
when 'production'
OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.1) # 10% sampling
)
when 'testing'
OpenTelemetry::SDK::Trace::Samplers::AlwaysOff.new
else
OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.25) # 25% sampling
)
end
end
end
# Usage
TracingSetup.configure
tracer = OpenTelemetry.tracer_provider.tracer('my_app', '1.0.0')
Rails-specific config
# config/initializers/opentelemetry.rb
require 'opentelemetry/sdk'
require 'opentelemetry/instrumentation/all'
class RailsTracingConfig
def self.setup
OpenTelemetry::SDK.configure do |c|
c.service_name = Rails.application.class.module_parent_name.downcase
c.service_version = ENV['APP_VERSION'] || '1.0.0'
c.resource = OpenTelemetry::SDK::Resources::Resource.create({
'deployment.environment' => Rails.env,
'framework.name' => 'rails',
'framework.version' => Rails::VERSION::STRING
})
# Environment-based sampling
c.sampler = case Rails.env
when 'development'
OpenTelemetry::SDK::Trace::Samplers::AlwaysOn.new
when 'test'
OpenTelemetry::SDK::Trace::Samplers::AlwaysOff.new
when 'production'
OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.05) # 5% in production
)
else
OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.1)
)
end
c.use_all() # enables all instrumentation
end
end
end
RailsTracingConfig.setup
Custom sampler
You can create custom sampling logic by implementing the Sampler interface:
class CustomSampler
def initialize(high_priority_rate: 1.0, default_rate: 0.1)
@high_priority_rate = high_priority_rate
@default_rate = default_rate
end
def should_sample?(trace_id:, parent_context:, links:, name:, kind:, attributes:)
# Sample high-priority operations at higher rate
rate = if attributes && attributes['priority'] == 'high'
@high_priority_rate
else
@default_rate
end
# Use trace_id for deterministic sampling
sampling_threshold = (rate * (2**63 - 1)).to_i
trace_id_int = trace_id.unpack1('Q>')
if trace_id_int < sampling_threshold
OpenTelemetry::SDK::Trace::Samplers::Result.create(
decision: OpenTelemetry::SDK::Trace::Samplers::Decision::RECORD_AND_SAMPLE
)
else
OpenTelemetry::SDK::Trace::Samplers::Result.create(
decision: OpenTelemetry::SDK::Trace::Samplers::Decision::DROP
)
end
end
def description
"CustomSampler{high_priority_rate=#{@high_priority_rate}, default_rate=#{@default_rate}}"
end
end
# Use custom sampler
OpenTelemetry::SDK.configure do |c|
c.service_name = 'my-service'
c.sampler = CustomSampler.new(high_priority_rate: 1.0, default_rate: 0.1)
end
Sampler with caching
class CachedRateLimitSampler
def initialize(max_traces_per_second: 100)
@max_traces_per_second = max_traces_per_second
@trace_count = 0
@last_reset = Time.now
@mutex = Mutex.new
end
def should_sample?(trace_id:, parent_context:, links:, name:, kind:, attributes:)
@mutex.synchronize do
current_time = Time.now
# Reset counter every second
if current_time - @last_reset >= 1.0
@trace_count = 0
@last_reset = current_time
end
# Check if we've exceeded the rate limit
if @trace_count < @max_traces_per_second
@trace_count += 1
OpenTelemetry::SDK::Trace::Samplers::Result.create(
decision: OpenTelemetry::SDK::Trace::Samplers::Decision::RECORD_AND_SAMPLE
)
else
OpenTelemetry::SDK::Trace::Samplers::Result.create(
decision: OpenTelemetry::SDK::Trace::Samplers::Decision::DROP
)
end
end
end
def description
"CachedRateLimitSampler{max_traces_per_second=#{@max_traces_per_second}}"
end
end
# Usage
OpenTelemetry::SDK.configure do |c|
c.service_name = 'high-traffic-service'
c.sampler = CachedRateLimitSampler.new(max_traces_per_second: 50)
end
Debugging sampling
Check sampling
def log_sampling_decision(span_name)
tracer = OpenTelemetry.tracer_provider.tracer('debug_sampler', '1.0.0')
tracer.in_span(span_name) do |span|
if span.context.valid?
puts "Span '#{span_name}' - Trace ID: #{span.context.hex_trace_id}"
puts "Sampled: #{span.context.trace_flags.sampled?}"
puts "Recording: #{span.recording?}"
else
puts "Span '#{span_name}' - Invalid context (likely dropped)"
end
end
end
# Test sampling
10.times do |i|
log_sampling_decision("test-span-#{i}")
end
Monitor sampling rates
class SamplingMonitor
def initialize
@total_spans = 0
@sampled_spans = 0
@start_time = Time.now
@mutex = Mutex.new
end
def record_span(span)
@mutex.synchronize do
@total_spans += 1
@sampled_spans += 1 if span.context.valid? && span.context.trace_flags.sampled?
end
end
def sampling_rate
@mutex.synchronize do
return 0.0 if @total_spans == 0
@sampled_spans.to_f / @total_spans
end
end
def stats
@mutex.synchronize do
elapsed = Time.now - @start_time
{
total_spans: @total_spans,
sampled_spans: @sampled_spans,
sampling_rate: sampling_rate,
elapsed_seconds: elapsed,
spans_per_second: elapsed > 0 ? @total_spans / elapsed : 0
}
end
end
end
# Usage
monitor = SamplingMonitor.new
tracer = OpenTelemetry.tracer_provider.tracer('monitored_service', '1.0.0')
100.times do |i|
tracer.in_span("test-span-#{i}") do |span|
monitor.record_span(span)
sleep(0.01)
end
end
puts monitor.stats
Sampling middleware
class SamplingDebugMiddleware
def initialize(app)
@app = app
@monitor = SamplingMonitor.new
end
def call(env)
request = Rack::Request.new(env)
# Log sampling info for debugging
current_span = OpenTelemetry::Trace.current_span
if current_span&.context&.valid?
@monitor.record_span(current_span)
Rails.logger.debug({
message: 'Request sampling info',
path: request.path,
trace_id: current_span.context.hex_trace_id,
sampled: current_span.context.trace_flags.sampled?,
recording: current_span.recording?
})
end
status, headers, body = @app.call(env)
# Log stats periodically
stats = @monitor.stats
if stats[:total_spans] % 100 == 0
Rails.logger.info("Sampling stats: #{stats}")
end
[status, headers, body]
end
end
# Add to Rails application in development
# config/environments/development.rb
Rails.application.configure do
config.middleware.use SamplingDebugMiddleware
end
Production considerations
Sampling in microservices
In a microservices architecture, sampling decisions should be made at the root of the trace and propagated to all services. This ensures consistent sampling across the entire distributed trace.
# Use ParentBased sampler in all services
class MicroserviceTracingConfig
def self.setup_for_service(service_name)
OpenTelemetry::SDK.configure do |c|
c.service_name = service_name
# Root services (entry points) use TraceIdRatioBased
# Downstream services respect parent sampling decision
c.sampler = OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(0.1)
)
c.use_all()
end
end
end
# Gateway service (entry point)
MicroserviceTracingConfig.setup_for_service('api-gateway')
# User service (downstream)
MicroserviceTracingConfig.setup_for_service('user-service')
# Order service (downstream)
MicroserviceTracingConfig.setup_for_service('order-service')
Performance impact
Sampling reduces the performance overhead of tracing:
- CPU usage: Fewer spans to process and export
- Memory usage: Smaller trace buffers
- Network usage: Less data sent to backend
- Storage costs: Reduced storage requirements
Memory-aware sampling
class MemoryAwareSampler
def initialize(base_rate: 0.1, memory_threshold_mb: 500)
@base_rate = base_rate
@memory_threshold_mb = memory_threshold_mb
end
def should_sample?(trace_id:, parent_context:, links:, name:, kind:, attributes:)
# Adjust sampling rate based on memory usage
current_memory_mb = get_memory_usage_mb
rate = if current_memory_mb > @memory_threshold_mb
@base_rate * 0.5 # Reduce sampling when memory is high
else
@base_rate
end
# Use trace_id for deterministic sampling
sampling_threshold = (rate * (2**63 - 1)).to_i
trace_id_int = trace_id.unpack1('Q>')
if trace_id_int < sampling_threshold
OpenTelemetry::SDK::Trace::Samplers::Result.create(
decision: OpenTelemetry::SDK::Trace::Samplers::Decision::RECORD_AND_SAMPLE
)
else
OpenTelemetry::SDK::Trace::Samplers::Result.create(
decision: OpenTelemetry::SDK::Trace::Samplers::Decision::DROP
)
end
end
def description
"MemoryAwareSampler{base_rate=#{@base_rate}, threshold=#{@memory_threshold_mb}MB}"
end
private
def get_memory_usage_mb
# Get Ruby process memory usage
if File.exist?("/proc/#{Process.pid}/status")
status = File.read("/proc/#{Process.pid}/status")
if match = status.match(/VmRSS:\s+(\d+)\s+kB/)
return match[1].to_i / 1024 # Convert KB to MB
end
end
# Fallback: use GC stats if available
if GC.respond_to?(:stat)
stat = GC.stat
heap_pages = stat[:heap_allocated_pages] || 0
heap_pages * 16384 / 1024 / 1024 # Approximate MB
else
0
end
rescue StandardError
0 # Return 0 if unable to determine memory usage
end
end
# Usage
OpenTelemetry::SDK.configure do |c|
c.service_name = 'memory-sensitive-service'
c.sampler = MemoryAwareSampler.new(base_rate: 0.1, memory_threshold_mb: 400)
end
Sampling strategies
class EnvironmentBasedSampling
SAMPLING_CONFIGS = {
'production' => {
rate: 0.01, # 1% sampling in production
description: 'Conservative sampling for production load'
},
'staging' => {
rate: 0.1, # 10% sampling in staging
description: 'Moderate sampling for staging validation'
},
'development' => {
rate: 1.0, # 100% sampling in development
description: 'Full sampling for development debugging'
},
'test' => {
rate: 0.0, # No sampling in tests
description: 'Disabled sampling for test performance'
}
}.freeze
def self.sampler_for_environment(env)
config = SAMPLING_CONFIGS[env] || SAMPLING_CONFIGS['development']
case config[:rate]
when 0.0
OpenTelemetry::SDK::Trace::Samplers::AlwaysOff.new
when 1.0
OpenTelemetry::SDK::Trace::Samplers::AlwaysOn.new
else
OpenTelemetry::SDK::Trace::Samplers::ParentBased.new(
root: OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(config[:rate])
)
end
end
def self.configure_for_rails
env = Rails.env
sampler = sampler_for_environment(env)
config = SAMPLING_CONFIGS[env] || {}
Rails.logger.info("Configuring OpenTelemetry sampling for #{env}: #{config[:description]}")
OpenTelemetry::SDK.configure do |c|
c.service_name = Rails.application.class.module_parent_name.downcase
c.sampler = sampler
c.use_all()
end
end
end
# Usage in Rails initializer
EnvironmentBasedSampling.configure_for_rails
Background job
class BackgroundJobSampler
def initialize(job_sampling_rates = {})
@job_sampling_rates = {
'high_priority' => 1.0, # Always sample high priority jobs
'normal' => 0.1, # 10% of normal jobs
'low_priority' => 0.01 # 1% of low priority jobs
}.merge(job_sampling_rates)
end
def should_sample?(trace_id:, parent_context:, links:, name:, kind:, attributes:)
# Determine job priority from attributes or span name
priority = determine_job_priority(name, attributes)
rate = @job_sampling_rates[priority] || @job_sampling_rates['normal']
# Use trace_id for deterministic sampling
sampling_threshold = (rate * (2**63 - 1)).to_i
trace_id_int = trace_id.unpack1('Q>')
if trace_id_int < sampling_threshold
OpenTelemetry::SDK::Trace::Samplers::Result.create(
decision: OpenTelemetry::SDK::Trace::Samplers::Decision::RECORD_AND_SAMPLE
)
else
OpenTelemetry::SDK::Trace::Samplers::Result.create(
decision: OpenTelemetry::SDK::Trace::Samplers::Decision::DROP
)
end
end
def description
"BackgroundJobSampler{rates=#{@job_sampling_rates}}"
end
private
def determine_job_priority(name, attributes)
# Check attributes first
return attributes['job.priority'] if attributes&.key?('job.priority')
# Infer from job name
case name
when /urgent|critical|high/i
'high_priority'
when /low|batch|cleanup/i
'low_priority'
else
'normal'
end
end
end
# Configure for Sidekiq
OpenTelemetry::SDK.configure do |c|
c.service_name = 'background-workers'
c.sampler = BackgroundJobSampler.new({
'urgent' => 1.0,
'normal' => 0.05,
'cleanup' => 0.001
})
c.use_all()
end