OpenTelemetry Erlang/Elixir Tracing API

Most Erlang/Elixir applications are covered by automatic instrumentation through libraries like OpenTelemetry Phoenix, OpenTelemetry Ecto, and OpenTelemetry Cowboy. These libraries automatically create spans for HTTP requests, database queries, and web server operations without requiring manual code changes.

However, there are cases where manual instrumentation is necessary to trace custom business logic, external API calls, background processing, and application-specific operations. This guide covers practical examples of manual tracing using the OpenTelemetry Erlang/Elixir API.

Prerequisites

Ensure you have OpenTelemetry configured in your application. For setup instructions, see Monitor OpenTelemetry Erlang/Elixir with Uptrace.

Basic manual instrumentation

Creating spans in Elixir

Use OpenTelemetry.Tracer.with_span/2 to create manual spans:

elixir
require OpenTelemetry.Tracer

def process_user_data(user_id) do
  OpenTelemetry.Tracer.with_span "process_user_data" do
    # Add attributes to provide context
    OpenTelemetry.Tracer.set_attribute("user.id", user_id)

    # Your business logic here
    user = fetch_user(user_id)
    result = complex_processing(user)

    # Add attributes based on processing results
    OpenTelemetry.Tracer.set_attribute("processing.result_count", length(result))

    result
  end
end

Creating spans in Erlang

Use the ?with_span macro in Erlang:

erlang
-include_lib("opentelemetry_api/include/otel_tracer.hrl").

process_user_data(UserId) ->
    ?with_span(<<"process_user_data">>, #{}, fun() ->
        % Add attributes
        ?set_attribute(<<"user.id">>, UserId),

        % Your business logic here
        User = fetch_user(UserId),
        Result = complex_processing(User),

        % Add attributes based on results
        ?set_attribute(<<"processing.result_count">>, length(Result)),

        Result
    end).

HTTP client instrumentation

When making external HTTP requests without automatic instrumentation:

Elixir with HTTPoison

elixir
defmodule MyApp.HTTPClient do
  require OpenTelemetry.Tracer

  def fetch_external_data(url) do
    OpenTelemetry.Tracer.with_span "http_client_request" do
      # Add HTTP semantic attributes
      OpenTelemetry.Tracer.set_attributes(%{
        "http.method" => "GET",
        "http.url" => url,
        "http.user_agent" => "MyApp/1.0"
      })

      start_time = System.monotonic_time(:millisecond)

      case HTTPoison.get(url, [], timeout: 30_000) do
        {:ok, %HTTPoison.Response{status_code: status_code, body: body}} ->
          duration = System.monotonic_time(:millisecond) - start_time

          # Add response attributes
          OpenTelemetry.Tracer.set_attributes(%{
            "http.status_code" => status_code,
            "http.response_content_length" => byte_size(body),
            "http.request_duration_ms" => duration
          })

          if status_code >= 400 do
            OpenTelemetry.Tracer.set_status(:error, "HTTP #{status_code}")
          end

          {:ok, body}

        {:error, %HTTPoison.Error{reason: reason}} ->
          OpenTelemetry.Tracer.set_status(:error, "HTTP request failed: #{reason}")
          OpenTelemetry.Tracer.add_event("http.request.error", %{
            "error.type" => "connection_error",
            "error.message" => to_string(reason)
          })

          {:error, reason}
      end
    end
  end
end

Erlang with httpc

erlang
-include_lib("opentelemetry_api/include/otel_tracer.hrl").

fetch_external_data(Url) ->
    ?with_span(<<"http_client_request">>, #{}, fun() ->
        % Add HTTP semantic attributes
        ?set_attribute(<<"http.method">>, <<"GET">>),
        ?set_attribute(<<"http.url">>, Url),

        StartTime = erlang:monotonic_time(millisecond),

        case httpc:request(get, {Url, []}, [{timeout, 30000}], []) of
            {ok, {{_Version, StatusCode, _ReasonPhrase}, _Headers, Body}} ->
                Duration = erlang:monotonic_time(millisecond) - StartTime,

                % Add response attributes
                ?set_attribute(<<"http.status_code">>, StatusCode),
                ?set_attribute(<<"http.response_content_length">>, byte_size(Body)),
                ?set_attribute(<<"http.request_duration_ms">>, Duration),

                case StatusCode >= 400 of
                    true ->
                        ?set_status(?OTEL_STATUS_ERROR, <<"HTTP error">>);
                    false ->
                        ok
                end,

                {ok, Body};

            {error, Reason} ->
                ?set_status(?OTEL_STATUS_ERROR, <<"HTTP request failed">>),
                ?add_event(<<"http.request.error">>, #{
                    <<"error.type">> => <<"connection_error">>,
                    <<"error.message">> => atom_to_binary(Reason)
                }),

                {error, Reason}
        end
    end).

Background job processing

Trace background job execution with custom spans:

Elixir with GenServer

elixir
defmodule MyApp.JobProcessor do
  use GenServer
  require OpenTelemetry.Tracer

  def handle_cast({:process_job, job_id, job_data}, state) do
    OpenTelemetry.Tracer.with_span "background_job" do
      # Add job context attributes
      OpenTelemetry.Tracer.set_attributes(%{
        "job.id" => job_id,
        "job.type" => job_data.type,
        "job.priority" => job_data.priority,
        "worker.pid" => inspect(self())
      })

      start_time = System.monotonic_time(:millisecond)

      try do
        result = execute_job(job_data)
        duration = System.monotonic_time(:millisecond) - start_time

        # Add success metrics
        OpenTelemetry.Tracer.set_attributes(%{
          "job.status" => "completed",
          "job.duration_ms" => duration,
          "job.result_size" => map_size(result)
        })

        OpenTelemetry.Tracer.add_event("job.completed", %{
          "job.id" => job_id,
          "processing_time" => duration
        })

        {:noreply, state}

      rescue
        error ->
          duration = System.monotonic_time(:millisecond) - start_time

          # Add error information
          OpenTelemetry.Tracer.set_status(:error, "Job processing failed")
          OpenTelemetry.Tracer.set_attributes(%{
            "job.status" => "failed",
            "job.duration_ms" => duration,
            "error.type" => error.__struct__,
            "error.message" => Exception.message(error)
          })

          OpenTelemetry.Tracer.add_event("job.failed", %{
            "job.id" => job_id,
            "error.details" => Exception.format(:error, error, __STACKTRACE__)
          })

          reraise error, __STACKTRACE__
      end
    end
  end

  defp execute_job(job_data) do
    # Simulate job processing
    :timer.sleep(100)
    %{status: :ok, processed_items: 42}
  end
end

Erlang with gen_server

erlang
-behaviour(gen_server).
-include_lib("opentelemetry_api/include/otel_tracer.hrl").

handle_cast({process_job, JobId, JobData}, State) ->
    ?with_span(<<"background_job">>, #{}, fun() ->
        % Add job context attributes
        ?set_attribute(<<"job.id">>, JobId),
        ?set_attribute(<<"job.type">>, maps:get(type, JobData)),
        ?set_attribute(<<"worker.pid">>, list_to_binary(pid_to_list(self()))),

        StartTime = erlang:monotonic_time(millisecond),

        try execute_job(JobData) of
            Result ->
                Duration = erlang:monotonic_time(millisecond) - StartTime,

                % Add success metrics
                ?set_attribute(<<"job.status">>, <<"completed">>),
                ?set_attribute(<<"job.duration_ms">>, Duration),

                ?add_event(<<"job.completed">>, #{
                    <<"job.id">> => JobId,
                    <<"processing_time">> => Duration
                }),

                {noreply, State}
        catch
            Class:Reason:Stacktrace ->
                Duration = erlang:monotonic_time(millisecond) - StartTime,

                % Add error information
                ?set_status(?OTEL_STATUS_ERROR, <<"Job processing failed">>),
                ?set_attribute(<<"job.status">>, <<"failed">>),
                ?set_attribute(<<"job.duration_ms">>, Duration),
                ?set_attribute(<<"error.type">>, atom_to_binary(Class)),

                ?add_event(<<"job.failed">>, #{
                    <<"job.id">> => JobId,
                    <<"error.class">> => atom_to_binary(Class),
                    <<"error.reason">> => term_to_binary(Reason)
                }),

                erlang:raise(Class, Reason, Stacktrace)
        end
    end).

execute_job(_JobData) ->
    % Simulate job processing
    timer:sleep(100),
    #{status => ok, processed_items => 42}.

Database operations without Ecto

For custom database operations or when using database drivers directly:

Elixir with Postgrex

elixir
defmodule MyApp.CustomDB do
  require OpenTelemetry.Tracer

  def execute_complex_query(query, params) do
    OpenTelemetry.Tracer.with_span "db.query" do
      # Add database semantic attributes
      OpenTelemetry.Tracer.set_attributes(%{
        "db.system" => "postgresql",
        "db.operation" => "SELECT",
        "db.statement" => query,
        "db.name" => "myapp_production"
      })

      start_time = System.monotonic_time(:microsecond)

      case Postgrex.query(MyApp.Repo, query, params) do
        {:ok, %Postgrex.Result{num_rows: num_rows} = result} ->
          duration = System.monotonic_time(:microsecond) - start_time

          OpenTelemetry.Tracer.set_attributes(%{
            "db.rows_affected" => num_rows,
            "db.query_duration_us" => duration
          })

          if num_rows == 0 do
            OpenTelemetry.Tracer.add_event("db.no_results", %{
              "query" => String.slice(query, 0, 100)
            })
          end

          {:ok, result}

        {:error, %Postgrex.Error{} = error} ->
          duration = System.monotonic_time(:microsecond) - start_time

          OpenTelemetry.Tracer.set_status(:error, "Database query failed")
          OpenTelemetry.Tracer.set_attributes(%{
            "db.query_duration_us" => duration,
            "error.type" => "database_error",
            "error.message" => error.message
          })

          {:error, error}
      end
    end
  end
end

File operations and I/O

Trace file system operations and data processing:

Elixir file processing

elixir
defmodule MyApp.FileProcessor do
  require OpenTelemetry.Tracer

  def process_large_file(file_path) do
    OpenTelemetry.Tracer.with_span "file.process" do
      OpenTelemetry.Tracer.set_attributes(%{
        "file.path" => file_path,
        "operation.type" => "batch_processing"
      })

      case File.stat(file_path) do
        {:ok, %File.Stat{size: size}} ->
          OpenTelemetry.Tracer.set_attribute("file.size_bytes", size)

          start_time = System.monotonic_time(:millisecond)

          case File.stream!(file_path, [:read], 1024)
               |> Stream.chunk_every(100)
               |> Enum.reduce(0, &process_chunk/2) do
            processed_lines ->
              duration = System.monotonic_time(:millisecond) - start_time

              OpenTelemetry.Tracer.set_attributes(%{
                "file.processed_lines" => processed_lines,
                "file.processing_duration_ms" => duration,
                "file.processing_rate_lines_per_sec" =>
                  round(processed_lines / (duration / 1000))
              })

              OpenTelemetry.Tracer.add_event("file.processing.completed", %{
                "lines_processed" => processed_lines,
                "file_size" => size
              })

              {:ok, processed_lines}
          end

        {:error, reason} ->
          OpenTelemetry.Tracer.set_status(:error, "File access failed")
          OpenTelemetry.Tracer.set_attribute("error.type", "file_error")
          OpenTelemetry.Tracer.set_attribute("error.reason", to_string(reason))

          {:error, reason}
      end
    end
  end

  defp process_chunk(chunk, acc) do
    # Add a nested span for chunk processing
    OpenTelemetry.Tracer.with_span "file.chunk.process" do
      OpenTelemetry.Tracer.set_attribute("chunk.size", length(chunk))

      # Simulate processing
      :timer.sleep(10)

      acc + length(chunk)
    end
  end
end

Nested spans and context propagation

Create hierarchical spans to trace complex operations:

Elixir nested operations

elixir
defmodule MyApp.OrderProcessor do
  require OpenTelemetry.Tracer

  def process_order(order_id) do
    OpenTelemetry.Tracer.with_span "order.process" do
      OpenTelemetry.Tracer.set_attributes(%{
        "order.id" => order_id,
        "service.operation" => "order_processing"
      })

      # Each step creates its own child span
      order = validate_order(order_id)
      payment_result = process_payment(order)
      inventory_result = update_inventory(order)
      notification_result = send_confirmation(order)

      OpenTelemetry.Tracer.set_attribute("order.status", "completed")

      {:ok, %{
        order: order,
        payment: payment_result,
        inventory: inventory_result,
        notification: notification_result
      }}
    end
  end

  defp validate_order(order_id) do
    OpenTelemetry.Tracer.with_span "order.validate" do
      OpenTelemetry.Tracer.set_attribute("order.id", order_id)

      # Validation logic
      :timer.sleep(50)

      OpenTelemetry.Tracer.add_event("order.validation.completed")
      %{id: order_id, items: 3, total: 99.99}
    end
  end

  defp process_payment(order) do
    OpenTelemetry.Tracer.with_span "payment.process" do
      OpenTelemetry.Tracer.set_attributes(%{
        "payment.amount" => order.total,
        "payment.currency" => "USD"
      })

      # Payment processing
      :timer.sleep(200)

      OpenTelemetry.Tracer.add_event("payment.processed", %{
        "transaction.id" => "txn_123456"
      })

      {:ok, "txn_123456"}
    end
  end

  defp update_inventory(order) do
    OpenTelemetry.Tracer.with_span "inventory.update" do
      OpenTelemetry.Tracer.set_attribute("inventory.items_count", order.items)

      # Inventory update
      :timer.sleep(100)

      OpenTelemetry.Tracer.add_event("inventory.updated")
      {:ok, :updated}
    end
  end

  defp send_confirmation(order) do
    OpenTelemetry.Tracer.with_span "notification.send" do
      OpenTelemetry.Tracer.set_attributes(%{
        "notification.type" => "email",
        "notification.recipient" => "customer@example.com"
      })

      # Send notification
      :timer.sleep(150)

      OpenTelemetry.Tracer.add_event("notification.sent")
      {:ok, :sent}
    end
  end
end

What's next?